## ----options, results="hide", include=FALSE, cache=FALSE, message=FALSE---- knitr::opts_chunk$set(fig.align="center", cache=TRUE,error=FALSE, #stop on error fig.width=5, fig.height=5, autodep=TRUE, out.width="600px", out.height="600px", results="markup", echo=TRUE, eval=TRUE) #knitr::opts_knit$set(stop_on_error = 2L) #really make it stop #knitr::dep_auto() options(getClass.msg=FALSE) graphics:::par(pch = 16, las = 1) set.seed(12345) ## for reproducibility library(SingleCellExperiment) library(slingshot, quietly = TRUE) ## ----dataSetup_sim--------------------------------------------------------- # generate synthetic count data representing a single lineage means <- rbind( # non-DE genes matrix(rep(rep(c(0.1,0.5,1,2,3), each = 300),100), ncol = 300, byrow = TRUE), # early deactivation matrix(rep(exp(atan( ((300:1)-200)/50 )),50), ncol = 300, byrow = TRUE), # late deactivation matrix(rep(exp(atan( ((300:1)-100)/50 )),50), ncol = 300, byrow = TRUE), # early activation matrix(rep(exp(atan( ((1:300)-100)/50 )),50), ncol = 300, byrow = TRUE), # late activation matrix(rep(exp(atan( ((1:300)-200)/50 )),50), ncol = 300, byrow = TRUE), # transient matrix(rep(exp(atan( c((1:100)/33, rep(3,100), (100:1)/33) )),50), ncol = 300, byrow = TRUE) ) counts <- apply(means,2,function(cell_means){ total <- rnbinom(1, mu = 7500, size = 4) rmultinom(1, total, cell_means) }) rownames(counts) <- paste0('G',1:750) colnames(counts) <- paste0('c',1:300) sim <- SingleCellExperiment(assays = List(counts = counts)) ## ----data_sling------------------------------------------------------------ library(slingshot, quietly = FALSE) data("slingshotExample") dim(rd) # data representing cells in a reduced dimensional space length(cl) # vector of cluster labels ## ----genefilt-------------------------------------------------------------- # filter genes down to potential cell-type markers # at least M (15) reads in at least N (15) cells geneFilter <- apply(assays(sim)$counts,1,function(x){ sum(x >= 3) >= 10 }) sim <- sim[geneFilter, ] ## ----norm------------------------------------------------------------------ FQnorm <- function(counts){ rk <- apply(counts,2,rank,ties.method='min') counts.sort <- apply(counts,2,sort) refdist <- apply(counts.sort,1,median) norm <- apply(rk,2,function(r){ refdist[r] }) rownames(norm) <- rownames(counts) return(norm) } assays(sim)$norm <- FQnorm(assays(sim)$counts) ## ----pca, cache=TRUE------------------------------------------------------- pca <- prcomp(t(log1p(assays(sim)$norm)), scale. = FALSE) rd1 <- pca$x[,1:2] plot(rd1, col = rgb(0,0,0,.5), pch=16, asp = 1) ## ----dm, cache=TRUE-------------------------------------------------------- library(destiny, quietly = TRUE) dm <- DiffusionMap(t(log1p(assays(sim)$norm))) rd2 <- cbind(DC1 = dm$DC1, DC2 = dm$DC2) plot(rd2, col = rgb(0,0,0,.5), pch=16, asp = 1) ## ----add_RDs, cache=TRUE--------------------------------------------------- reducedDims(sim) <- SimpleList(PCA = rd1, DiffMap = rd2) ## ----clustering_mclust----------------------------------------------------- library(mclust, quietly = TRUE) cl1 <- Mclust(rd1)$classification colData(sim)$GMM <- cl1 library(RColorBrewer) plot(rd1, col = brewer.pal(9,"Set1")[cl1], pch=16, asp = 1) ## ----clustering------------------------------------------------------------ cl2 <- kmeans(rd1, centers = 4)$cluster colData(sim)$kmeans <- cl2 plot(rd1, col = brewer.pal(9,"Set1")[cl2], pch=16, asp = 1) ## ----sling_sce------------------------------------------------------------- sim <- slingshot(sim, clusterLabels = 'GMM', reducedDim = 'PCA') ## ----plot_curve_1---------------------------------------------------------- summary(sim$slingPseudotime_1) colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100) plotcol <- colors[cut(sim$slingPseudotime_1, breaks=100)] plot(reducedDims(sim)$PCA, col = plotcol, pch=16, asp = 1) lines(SlingshotDataSet(sim), lwd=2, col='black') ## ----plot_curve_2---------------------------------------------------------- plot(reducedDims(sim)$PCA, col = brewer.pal(9,'Set1')[sim$GMM], pch=16, asp = 1) lines(SlingshotDataSet(sim), lwd=2, type = 'lineages', col = 'black') ## ----fitgam, cache = TRUE-------------------------------------------------- require(gam) t <- sim$slingPseudotime_1 # for time, only look at the 100 most variable genes Y <- log1p(assays(sim)$norm) var100 <- names(sort(apply(Y,1,var),decreasing = TRUE))[1:100] Y <- Y[var100,] # fit a GAM with a loess term for pseudotime gam.pval <- apply(Y,1,function(z){ d <- data.frame(z=z, t=t) suppressWarnings({ tmp <- suppressWarnings(gam(z ~ lo(t), data=d)) }) p <- summary(tmp)[3][[1]][2,3] p }) ## ----heatmaps-------------------------------------------------------------- topgenes <- names(sort(gam.pval, decreasing = FALSE))[1:100] heatdata <- assays(sim)$norm[topgenes, order(t, na.last = NA)] heatclus <- sim$GMM[order(t, na.last = NA)] heatmap(log1p(heatdata), Colv = NA, ColSideColors = brewer.pal(9,"Set1")[heatclus]) ## ----sling_lines_unsup----------------------------------------------------- lin1 <- getLineages(rd, cl, start.clus = '1') lin1 plot(rd, col = brewer.pal(9,"Set1")[cl], asp = 1, pch = 16) lines(lin1, lwd = 3, col = 'black') ## ----lines_sup_end--------------------------------------------------------- lin2 <- getLineages(rd, cl, start.clus= '1', end.clus = '3') plot(rd, col = brewer.pal(9,"Set1")[cl], asp = 1, pch = 16) lines(lin2, lwd = 3, col = 'black', show.constraints = TRUE) ## ----curves---------------------------------------------------------------- crv1 <- getCurves(lin1) crv1 plot(rd, col = brewer.pal(9,"Set1")[cl], asp = 1, pch = 16) lines(crv1, lwd = 3, col = 'black') ## ----session--------------------------------------------------------------- sessionInfo()