## ----style, echo=FALSE, results='hide', message=FALSE, cache=FALSE--------- library(BiocStyle) library(knitr) opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE, cache=TRUE) opts_chunk$set(fig.asp=1) ## ---- cache=FALSE, echo=FALSE, results="hide"------------------------------ simpleSingleCell:::.compile("reads") # 416B simpleSingleCell:::.compile("tenx") # PBMC ## -------------------------------------------------------------------------- library(SingleCellExperiment) sce.full.416b <- readRDS("416B_preQC.rds") library(scater) lost <- calcAverage(counts(sce.full.416b)[,!sce.full.416b$PassQC]) kept <- calcAverage(counts(sce.full.416b)[,sce.full.416b$PassQC]) ## ----discardplot416b, fig.cap="Average counts across all discarded and retained cells in the 416B dataset. Each point represents a gene, with spike-in and mitochondrial transcripts in red and blue respectively."---- # Avoid loss of points where either average is zero. capped.lost <- pmax(lost, min(lost[lost>0])) capped.kept <- pmax(kept, min(kept[kept>0])) plot(capped.lost, capped.kept, xlab="Average count (discarded)", ylab="Average count (retained)", log="xy", pch=16) is.spike <- isSpike(sce.full.416b) points(capped.lost[is.spike], capped.kept[is.spike], col="red", pch=16) is.mito <- rowData(sce.full.416b)$is_feature_control_Mt points(capped.lost[is.mito], capped.kept[is.mito], col="dodgerblue", pch=16) ## -------------------------------------------------------------------------- library(edgeR) coefs <- predFC(cbind(lost, kept), design=cbind(1, c(1, 0)))[,2] info <- data.frame(logFC=coefs, Lost=lost, Kept=kept, row.names=rownames(sce.full.416b)) head(info[order(info$logFC, decreasing=TRUE),], 20) ## -------------------------------------------------------------------------- sce.pbmc <- readRDS("pbmc_data.rds") wrong.keep <- sce.pbmc$total_counts >= 1000 lost <- calcAverage(counts(sce.pbmc)[,!wrong.keep]) kept <- calcAverage(counts(sce.pbmc)[,wrong.keep]) ## ----discardplotpbmc, fig.cap="Average counts across all discarded and retained cells in the PBMC dataset, after using a more stringent filter on the total UMI count. Each point represents a gene, with platelet-related genes highlighted in orange."---- # Avoid loss of points where either average is zero. capped.lost <- pmax(lost, min(lost[lost>0])) capped.kept <- pmax(kept, min(kept[kept>0])) plot(capped.lost, capped.kept, xlab="Average count (discarded)", ylab="Average count (retained)", log="xy", pch=16) platelet <- c("PF4", "PPBP", "SDPR") points(capped.lost[platelet], capped.kept[platelet], col="orange", pch=16) ## -------------------------------------------------------------------------- coefs <- predFC(cbind(lost, kept), design=cbind(1, c(1, 0)))[,2] info <- data.frame(logFC=coefs, Lost=lost, Kept=kept, row.names=rownames(sce.pbmc)) head(info[order(info$logFC, decreasing=TRUE),], 20) ## -------------------------------------------------------------------------- # Obtaining the dataset. library(scRNAseq) data(allen) # Setting up the data. sce.allen <- as(allen, "SingleCellExperiment") assayNames(sce.allen) <- "counts" isSpike(sce.allen, "ERCC") <- grep("ERCC", rownames(sce.allen)) # Computing the QC metrics and running PCA. library(scater) sce.allen <- calculateQCMetrics(sce.allen) sce.allen <- runPCA(sce.allen, use_coldata=TRUE, detect_outliers=TRUE) table(sce.allen$outlier) ## -------------------------------------------------------------------------- sessionInfo()