Contents

1 Introduction

Biocpkg("BloodCancerMultiOmics2017") is a multi-omic dataset comprising genome, transcriptome, DNA methylome data together with data from the ex vivo drug sensitivity screen of the primary blood tumor samples.


In this vignette we present the analysis of the Primary Blood Cancer Encyclopedia (PACE) project and source code for the paper


Drug-perturbation-based stratification of blood cancer

Sascha Dietrich*, Małgorzata Oleś*, Junyan Lu*, Leopold Sellner, Simon Anders, Britta Velten, Bian Wu, Jennifer Hüllein, Michelle da Silva Liberio, Tatjana Walther, Lena Wagner, Sophie Rabe, Sonja Ghidelli-Disse, Marcus Bantscheff, Andrzej K. Oleś, Mikołaj Słabicki, Andreas Mock, Christopher C. Oakes, Shihui Wang, Sina Oppermann, Marina Lukas, Vladislav Kim, Martin Sill, Axel Benner, Anna Jauch, Lesley Ann Sutton, Emma Young, Richard Rosenquist, Xiyang Liu, Alexander Jethwa, Kwang Seok Lee, Joe Lewis, Kerstin Putzker, Christoph Lutz, Davide Rossi, Andriy Mokhir, Thomas Oellerich, Katja Zirlik, Marco Herling, Florence Nguyen-Khac, Christoph Plass, Emma Andersson, Satu Mustjoki, Christof von Kalle, Anthony D. Ho, Manfred Hensel, Jan Dürig, Ingo Ringshausen, Marc Zapatka, Wolfgang Huber and Thorsten Zenz
J. Clin. Invest. (2018); 128(1):427–445. doi:10.1172/JCI93801.

The presented analysis was done by Małgorzata Oleś, Sascha Dietrich, Junyan Lu, Britta Velten, Andreas Mock, Vladislav Kim and Wolfgang Huber.

This vignette was put together by Małgorzata Oleś.


This vignette is build from the sub-vignettes, which each can be build separately. The parts are separated by the horizontal lines. Each part finishes with removal of all the created objects.

library("AnnotationDbi")
library("abind")
library("beeswarm")
library("Biobase")
library("biomaRt")
library("broom")
library("colorspace")
library("cowplot")
library("dendsort")
library("DESeq2")
library("doParallel")
library("dplyr")
library("foreach")
library("forestplot")
library("genefilter")
library("ggbeeswarm")
library("ggdendro")
library("ggplot2")
#library("ggtern")
library("glmnet")
library("grid")
library("gridExtra")
library("gtable")
library("hexbin")
library("IHW")
library("ipflasso")
library("knitr")
library("limma")
library("magrittr")
library("maxstat")
library("nat")
library("org.Hs.eg.db")
library("BloodCancerMultiOmics2017")
library("pheatmap")
library("piano")
library("readxl")
library("RColorBrewer")
library("reshape2")
library("Rtsne")
library("scales")
library("SummarizedExperiment")
library("survival")
library("tibble")
library("tidyr")
library("tidyverse")
library("xtable")

options(stringsAsFactors=FALSE)

2 Characteristics of drugs and patients in the study

Loading the data.

data("drpar", "drugs", "patmeta", "mutCOM")

Creating vectors of patient samples and drugs within the drug screen. Within drugs, we omit the statistics for one drug combination, due to lack of possibility to assign its targets.

# PATIENTS
patM = colnames(drpar)

# DRUGS
drM = rownames(drpar)
drM = drM[!drM %in% "D_CHK"] # remove combintation of 2 drugs: D_CHK

General plotting parameters.

bwScale = c("0"="white","1"="black","N.A."="grey90")
lfsize = 16 # legend font size

2.1 Drugs

Categorize the drugs.

drugs$target_category = as.character(drugs$target_category)
drugs$group = NA
drugs$group[which(drugs$approved_042016==1)] = "FDA approved"
drugs$group[which(drugs$devel_042016==1)] = "clinical development/\ntool compound"

Show the characteristics.

                         FDA approved   clinical development/
tool compound
DNA damage response 4 12
EGFR 3 0
Other 3 4
ALK 1 1
Angiogenesis 1 0
Apoptosis (BH3, Survivin) 1 2
B-cell receptor 1 5
BCR/ABL 1 0
Epigenome 1 2
Hedgehog signaling 1 0
Immune modulation 1 0
JAK/STAT 1 2
MAPK 1 7
PI3K/AKT 1 5
Proteasome 1 1
mTOR 1 1
Cell cycle control 0 6
Cytoskeleton 0 2
HSP90 0 1
MET 0 1
Mitochondrial metabolism 0 2
NFkB 0 2
NOTCH 0 1
PIM 0 1
PKC 0 3
Reactive oxygen species 0 3
Splicing 0 1
TGF 0 1
WNT 0 1

2.2 Patient samples

Show number of samples stratified by the diagnosis.

Var1 Freq
CLL 184
T-PLL 25
MCL 10
MZL 6
AML 5
LPL 4
B-PLL 3
HCL 3
hMNC 3
HCL-V 2
Sezary 2
FL 1
PTCL-NOS 1

Within CLL group, we now show mutations with occurred in at least 4 samples.

# select CLL samples
patM = patM[patmeta[patM,"Diagnosis"]=="CLL"]

ighv = factor(setNames(patmeta[patM,"IGHV"], nm=patM), levels=c("U","M"))

mut1 = c("del17p13", "del11q22.3", "trisomy12", "del13q14_any")
mut2 = c("TP53", "ATM", "SF3B1", "NOTCH1", "MYD88")

mc = assayData(mutCOM)$binary[patM,]

## SELECTION OF MUTATIONS
# # include mutations with at least incidence of 4
mut2plot = names(which(sort(colSums(mc, na.rm=TRUE), decreasing=TRUE)>3))

# remove chromothrypsis
mut2plot = mut2plot[-grep("Chromothripsis", mut2plot)]
# divide mutations into gene mut and cnv
mut2plotSV = mut2plot[grep("[[:lower:]]", mut2plot)]
mut2plotSP = mut2plot[grep("[[:upper:]]", mut2plot)]

# remove some other things (it is quite manual thing, so be careful)
# IF YOU WANT TO REMOVE SOME MORE MUTATIONS JUST ADD THE LINES HERE!
mut2plotSV = mut2plotSV[-grep("del13q14_mono", mut2plotSV)]
mut2plotSV = mut2plotSV[-grep("del13q14_bi", mut2plotSV)]
mut2plotSV = mut2plotSV[-grep("del14q24.3", mut2plotSV)]

# rearrange the top ones to match the order in mut1 and mut2
mut2plotSV = c(mut1, mut2plotSV[!mut2plotSV %in% mut1])
mut2plotSP = c(mut2, mut2plotSP[!mut2plotSP %in% mut2])

factors = data.frame(assayData(mutCOM)$binary[patM, c(mut2plotSV, mut2plotSP)],
                     check.names=FALSE)
# change del13q14_any to del13q14
colnames(factors)[which(colnames(factors)=="del13q14_any")] = "del13q14"
mut2plotSV = gsub("del13q14_any", "del13q14", mut2plotSV)
# change it to factors
for(i in 1:ncol(factors)) {
  factors[,i] = factor(factors[,i], levels=c(1,0))
}

ord = order(factors[,1], factors[,2], factors[,3], factors[,4], factors[,5],
            factors[,6], factors[,7], factors[,8], factors[,9], factors[,10],
            factors[,11], factors[,12], factors[,13], factors[,14],
            factors[,15], factors[,16], factors[,17], factors[,18],
            factors[,19], factors[,20], factors[,21], factors[,22],
            factors[,23], factors[,24], factors[,25], factors[,26],
            factors[,27], factors[,28], factors[,29], factors[,30],
            factors[,31], factors[,32])

factorsord = factors[ord,]
patM = patM[ord]

(c(mut2plotSV, mut2plotSP))
##  [1] "del17p13"   "del11q22.3" "trisomy12"  "del13q14"   "del8p12"   
##  [6] "gain2p25.3" "gain8q24"   "del6q21"    "gain3q26"   "del9p21.3" 
## [11] "del15q15.1" "del6p21.2"  "TP53"       "ATM"        "SF3B1"     
## [16] "NOTCH1"     "MYD88"      "BRAF"       "KRAS"       "EGR2"      
## [21] "MED12"      "PCLO"       "MGA"        "ACTN2"      "BIRC3"     
## [26] "CPS1"       "FLRT2"      "KLHL6"      "NFKBIE"     "RYR2"      
## [31] "XPO1"       "ZC3H18"

Let’s now look deeper and for each mutation. We ask how many samples have (1) or don’t have (0) a particular mutation.

plotDF = meltWholeDF(factorsord)
plotDF$Mut =
  ifelse(sapply(plotDF$X,
                function(x) grep(x, list(mut2plotSV, mut2plotSP)))==1,"SV","SP")
plotDF$Status = "N.A."
plotDF$Status[plotDF$Measure==1 & plotDF$Mut=="SV"] = "1a"
plotDF$Status[plotDF$Measure==1 & plotDF$Mut=="SP"] = "1b"
plotDF$Status[plotDF$Measure==0] = "0"
plotDF$Status = factor(plotDF$Status, levels=c("1a","1b","0","N.A."))

plotDF$Y = factor(plotDF$Y, levels=patM)
plotDF$X = factor(plotDF$X, levels=rev(colnames(factorsord)))

mutPL = ggplotGrob(
  ggplot(data=plotDF, aes(x=Y, y=X, fill=Status)) + geom_tile() +
    scale_fill_manual(
      values=c("0"="white","1a"="forestgreen","1b"="navy","N.A."="grey90"),
      name="Mutation", labels=c("CNV","Gene mutation","WT","NA")) +
    ylab("") + xlab("") +
    geom_vline(xintercept=seq(0.5,length(patM)+1,5), colour="grey60") +
    geom_hline(yintercept=seq(0.5,ncol(factorsord)+1,1), colour="grey60") +
    scale_y_discrete(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) +
    theme(axis.ticks=element_blank(), axis.text.x=element_blank(),
          axis.text.y=element_text(
            size=60, face=ifelse(levels(plotDF$X) %in% mut2plotSV,
                                 "plain","italic")),
          axis.text=element_text(margin=unit(0.5,"cm"), colour="black"),
          legend.key = element_rect(colour = "black"),
          legend.text=element_text(size=lfsize),
          legend.title=element_text(size=lfsize)))
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
res = table(plotDF[,c("X","Measure")])
knitr::kable(res[order(res[,2], decreasing=TRUE),])
0 1
del13q14 69 106
TP53 142 36
del11q22.3 146 28
trisomy12 146 27
del17p13 150 27
SF3B1 152 26
NOTCH1 160 19
ATM 89 11
BRAF 169 10
KRAS 136 8
del8p12 131 8
gain8q24 162 7
gain2p25.3 132 7
PCLO 94 6
MED12 94 6
EGR2 94 6
del6q21 163 6
MGA 95 5
MYD88 173 5
del15q15.1 134 5
del9p21.3 134 5
gain3q26 134 5
ZC3H18 96 4
XPO1 96 4
RYR2 96 4
NFKBIE 96 4
KLHL6 96 4
FLRT2 96 4
CPS1 96 4
BIRC3 96 4
ACTN2 96 4
del6p21.2 135 4

In the last part, we characterize samples according to metadata categories.

Age

ageDF = data.frame(Factor="Age",
                   PatientID=factor(patM, levels=patM),
                   Value=patmeta[patM,c("Age4Main")])

agePL = ggplotGrob(
  ggplot(ageDF, aes(x=PatientID, y=Factor, fill=Value)) + geom_tile() +
    scale_fill_gradient(low = "gold", high = "#3D1F00", na.value="grey92",
                        name="Age", breaks=c(40,60,80)) +
    theme(axis.ticks=element_blank(),
          axis.text=element_text(size=60, colour="black",
                                 margin=unit(0.5,"cm")),
          legend.text=element_text(size=lfsize),
          legend.title=element_text(size=lfsize)))
  
hist(ageDF$Value, col="slategrey", xlab="Age", main="")

Sex

sexDF = data.frame(Factor="Sex", PatientID=factor(patM, levels=patM),
                   Value=patmeta[patM, "Gender"])

sexPL = ggplotGrob(
  ggplot(sexDF, aes(x=PatientID, y=Factor, fill=Value)) + geom_tile() +
    scale_fill_manual(values=c("f"="maroon","m"="royalblue4","N.A."="grey90"),
                      name="Sex", labels=c("Female","Male","NA")) +
    theme(axis.ticks=element_blank(),
          axis.text=element_text(size=60, colour="black",
                                 margin=unit(0.5,"cm")),
          legend.key = element_rect(colour = "black"),
          legend.text=element_text(size=lfsize),
          legend.title=element_text(size=lfsize)))

table(sexDF$Value)
## 
##   f   m 
##  76 108

Treatment

Number of samples treated (1) or not treated (0) before sampling.

treatDF = data.frame(Factor="Treated", PatientID=factor(patM, levels=patM),
                     Value=ifelse(patmeta[patM, "IC50beforeTreatment"], 0, 1))
treatDF$Value[is.na(treatDF$Value)] = "N.A."
treatDF$Value = factor(treatDF$Value, levels=c("0","1","N.A."))

treatPL = ggplotGrob(
  ggplot(treatDF, aes(x=PatientID, y=Factor, fill=Value)) +geom_tile() +
    scale_fill_manual(values=bwScale, name="Treated",
                      labels=c("0"="No","1"="Yes","N.A."="NA")) +
    theme(axis.ticks=element_blank(),
          axis.text=element_text(size=60, colour="black",
                                 margin=unit(0.5,"cm")),
          legend.key = element_rect(colour = "black"),
          legend.text=element_text(size=lfsize),
          legend.title=element_text(size=lfsize)))

table(treatDF$Value)
## 
##    0    1 N.A. 
##  131   52    1

IGHV status

Number of samples with (1) and without (0) the IGHV mutation.

ighvDF = data.frame(Factor="IGHV", PatientID=factor(patM, levels=patM),
                    Value=patmeta[patM, "IGHV"])
ighvDF$Value = ifelse(ighvDF$Value=="M", 1, 0)
ighvDF$Value[is.na(ighvDF$Value)] = "N.A."
ighvDF$Value = factor(ighvDF$Value, levels=c("0","1","N.A."))

ighvPL = ggplotGrob(
  ggplot(ighvDF, aes(x=PatientID, y=Factor, fill=Value)) + geom_tile() +
    scale_fill_manual(values=bwScale, name="IGHV",
                      labels=c("0"="Unmutated","1"="Mutated","N.A."="NA")) +
    theme(axis.ticks=element_blank(), 
          axis.text=element_text(size=60, colour="black", margin=unit(0.5,"cm")),
          legend.key=element_rect(colour = "black"),
          legend.text=element_text(size=lfsize),
          legend.title=element_text(size=lfsize)))

table(ighvDF$Value)
## 
##    0    1 N.A. 
##   74   98   12