## ----knitr-options, include = FALSE------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----setup-------------------------------------------------------------------- library("splatter") library("scater") library("ggplot2") ## ----splat-params------------------------------------------------------------- params <- newSplatParams() params ## ----nGenes------------------------------------------------------------------- # Set the number of genes to 1000 params <- setParam(params, "nGenes", 1000) sim <- splatSimulate(params, verbose = FALSE) dim(sim) ## ----batches------------------------------------------------------------------ # Simulation with two batches of 100 cells sim <- splatSimulate(params, batchCells = c(100, 100), verbose = FALSE) # PCA plot using scater sim <- logNormCounts(sim) sim <- runPCA(sim) plotPCA(sim, colour_by = "Batch") ## ----batch-factors------------------------------------------------------------ # Simulation with small batch effects sim1 <- splatSimulate(params, batchCells = c(100, 100), batch.facLoc = 0.001, batch.facScale = 0.001, verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Batch") + ggtitle("Small batch effects") # Simulation with big batch effects sim2 <- splatSimulate(params, batchCells = c(100, 100), batch.facLoc = 0.5, batch.facScale = 0.5, verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Batch") + ggtitle("Big batch effects") ## ----batch-rmEffect----------------------------------------------------------- sim1 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = FALSE, verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Batch") + ggtitle("With batch effects") sim2 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = TRUE, verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Batch") + ggtitle("Batch effects removed") ## ----outlier-prob------------------------------------------------------------- # Few outliers sim1 <- splatSimulate(out.prob = 0.001, verbose = FALSE) ggplot(as.data.frame(rowData(sim1)), aes(x = log10(GeneMean), fill = OutlierFactor != 1)) + geom_histogram(bins = 100) + ggtitle("Few outliers") # Lots of outliers sim2 <- splatSimulate(out.prob = 0.2, verbose = FALSE) ggplot(as.data.frame(rowData(sim2)), aes(x = log10(GeneMean), fill = OutlierFactor != 1)) + geom_histogram(bins = 100) + ggtitle("Lots of outliers") ## ----groups------------------------------------------------------------------- params.groups <- newSplatParams(batchCells = 500, nGenes = 1000) # One small group, one big group sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.9, 0.1), verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Group") + ggtitle("One small group, one big group") # Five groups sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2), verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Group") + ggtitle("Five groups") ## ----de-prob------------------------------------------------------------------ # Few DE genes sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5), de.prob = 0.01, verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Group") + ggtitle("Few DE genes") # Lots of DE genes sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5), de.prob = 0.3, verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Group") + ggtitle("Lots of DE genes") ## ----de-factors--------------------------------------------------------------- # Small DE factors sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5), de.facLoc = 0.01, verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Group") + ggtitle("Small DE factors") # Big DE factors sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5), de.facLoc = 0.3, verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Group") + ggtitle("Big DE factors") ## ----complex-de--------------------------------------------------------------- # Different DE probs sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2), de.prob = c(0.01, 0.01, 0.1, 0.1, 0.3), verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Group") + labs(title = "Different DE probabilities", caption = paste("Groups 1 and 2 have very few DE genes,", "Groups 3 and 4 have the default number,", "Group 5 has many DE genes")) # Different DE factors sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2), de.facLoc = c(0.01, 0.01, 0.1, 0.1, 0.2), de.facScale = c(0.2, 0.5, 0.2, 0.5, 0.4), verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Group") + labs(title = "Different DE factors", caption = paste("Group 1 has factors with small location (value),", "and scale (variability),", "Group 2 has small location and greater scale.\n", "Groups 3 and 4 have greater location with small,", "and large scales", "Group 5 has bigger factors with moderate", "variability")) # Combination of everything sim3 <- splatSimulateGroups(params.groups, group.prob = c(0.05, 0.2, 0.2, 0.2, 0.35), de.prob = c(0.3, 0.1, 0.2, 0.01, 0.1), de.downProb = c(0.1, 0.4, 0.9, 0.6, 0.5), de.facLoc = c(0.6, 0.1, 0.1, 0.01, 0.2), de.facScale = c(0.1, 0.4, 0.2, 0.5, 0.4), verbose = FALSE) sim3 <- logNormCounts(sim3) sim3 <- runPCA(sim3) plotPCA(sim3, colour_by = "Group") + labs(title = "Different DE factors", caption = paste( "Group 1 is small with many very up-regulated DE genes,", "Group 2 has the default DE parameters,\n", "Group 3 has many down-regulated DE genes,", "Group 4 has very few DE genes,", "Group 5 is large with moderate DE factors") ) ## ----paths-------------------------------------------------------------------- # Linear paths sim1 <- splatSimulatePaths(params.groups, group.prob = c(0.25, 0.25, 0.25, 0.25), de.prob = 0.5, de.facLoc = 0.2, path.from = c(0, 1, 2, 3), verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Group") + ggtitle("Linear paths") # Branching path sim2 <- splatSimulatePaths(params.groups, group.prob = c(0.25, 0.25, 0.25, 0.25), de.prob = 0.5, de.facLoc = 0.2, path.from = c(0, 1, 1, 3), verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Group") + ggtitle("Branching path") ## ----paths-steps-------------------------------------------------------------- # Few steps sim1 <- splatSimulatePaths(params.groups, path.nSteps = 3, de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Step") + ggtitle("Few steps") # Lots of steps sim2 <- splatSimulatePaths(params.groups, path.nSteps = 1000, de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE) sim2 <- logNormCounts(sim2) sim2 <- runPCA(sim2) plotPCA(sim2, colour_by = "Step") + ggtitle("Lots of steps") ## ----paths-skew--------------------------------------------------------------- # Skew towards the end sim1 <- splatSimulatePaths(params.groups, path.skew = 0.1, de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE) sim1 <- logNormCounts(sim1) sim1 <- runPCA(sim1) plotPCA(sim1, colour_by = "Step") + ggtitle("Skewed towards the end")