## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 5, fig.height = 4, dpi=200 ) ## ---- message=FALSE, warning=FALSE-------------------------------------------- #devtools::install_github("Nanostring-Biostats/NanoStringNCTools", # force = TRUE, ref = "master") library(NanoStringNCTools) ## ---- message=FALSE, warning=FALSE-------------------------------------------- library(ggthemes) library(ggiraph) library(pheatmap) ## ----------------------------------------------------------------------------- # set your file location datadir <- system.file("extdata", "3D_Bio_Example_Data", package = "NanoStringNCTools") # read in RCC files rcc_files <- dir(datadir, pattern = "SKMEL.*\\.RCC$", full.names = TRUE) # read in RLF file rlf_file <- file.path(datadir, "3D_SolidTumor_Sig.rlf") # read in sample annotation sample_annotation <- file.path(datadir, "3D_SolidTumor_PhenoData.csv") # load all the input files into demoData (S4 object) demoData <- readNanoStringRccSet(rcc_files, rlfFile = rlf_file, phenoDataFile = sample_annotation) class( demoData ) isS4( demoData ) is( demoData, "ExpressionSet" ) demoData ## ----warning = FALSE---------------------------------------------------------- # access the first two rows of the count matrix head(assayData(demoData)[["exprs"]], 2) # access the first two rows of the pheno data head(pData(demoData), 2) # access the protocol data protocolData(demoData) # access the first three rows of the probe information fData(demoData)[1:3, ] # access the available pheno and protocol data variables svarLabels(demoData) head(sData(demoData), 2) # access RLF information annotation(demoData) ## ----------------------------------------------------------------------------- # assign design information design(demoData) <- ~ `Treatment` design(demoData) # check the column names to use as labels for the features and samples respectively dimLabels(demoData) # Change SampleID to Sample ID protocolData(demoData)[["Sample ID"]] <- sampleNames(demoData) dimLabels(demoData)[2] <- "Sample ID" dimLabels(demoData) ## ----------------------------------------------------------------------------- # summarize log2 counts for each feature head(summary(demoData, MARGIN = 1), 2) # summarize log2 counts for each sample head(summary(demoData, MARGIN = 2), 2) # check the unique levels in Treatment group unique(sData(demoData)$"Treatment") # summarize log2 counts for each VEM sample head(summary(demoData, MARGIN = 2, GROUP = "Treatment")$VEM, 2) # summarize log2 counts for each DMSO sample head(summary(demoData, MARGIN = 2, GROUP = "Treatment")$"DMSO", 2) # summarize counts for each DMSO sample, without log2 transformation head(summary(demoData, MARGIN = 2, GROUP = "Treatment", log2 = FALSE)$"DMSO", 2) ## ----warning = FALSE---------------------------------------------------------- # check the number of samples in the dataset length(sampleNames(demoData)) # check the number of VEM samples in the dataset length(sampleNames(subset(demoData, select = phenoData(demoData)[["Treatment"]] == "VEM"))) # check the dimension of the expression matrix dim(exprs(demoData)) # check the dimension of the expression matrix for VEM samples and endogenous genes only dim(exprs(endogenousSubset(demoData, select = phenoData(demoData)[["Treatment"]] == "VEM"))) # housekeepingSubset() only selects housekeeper genes with(housekeepingSubset(demoData), table(CodeClass)) # negativeControlSubset() only selects negative probes with(negativeControlSubset(demoData), table(CodeClass)) # positiveControlSubset() only selects positive probes with(positiveControlSubset(demoData), table(CodeClass)) # controlSubset() selects all control probes with(controlSubset(demoData), table(CodeClass)) # nonControlSubset() selects all non-control probes with(nonControlSubset(demoData), table(CodeClass)) ## ----------------------------------------------------------------------------- neg_set <- negativeControlSubset(demoData) class(neg_set) ## ----------------------------------------------------------------------------- # calculate the log counts of gene expressions for each sample assayDataElement(demoData, "demoElem") <- assayDataApply(demoData, MARGIN=2, FUN=log, base=10, elt="exprs") assayDataElement(demoData, "demoElem")[1:3, 1:2] # calculate the mean of log counts for each feature assayDataApply(demoData, MARGIN=1, FUN=mean, elt="demoElem")[1:5] # split data by Treatment and calculate the mean of log counts for each feature head(esBy(demoData, GROUP = "Treatment", FUN = function(x){ assayDataApply(x, MARGIN = 1, FUN=mean, elt="demoElem") })) ## ----warning = FALSE---------------------------------------------------------- demoData <- normalize(demoData, type="nSolver", fromELT = "exprs", toELT = "exprs_norm") assayDataElement(demoData, elt = "exprs_norm")[1:3, 1:2] ## ----warning = FALSE---------------------------------------------------------- autoplot(demoData, type = "heatmap-genes", elt = "exprs_norm", heatmapGroup = c("Treatment", "BRAFGenotype"), show_colnames_gene_limit = 10, show_rownames_gene_limit = 40, log2scale = FALSE) ## ----warning = FALSE---------------------------------------------------------- # combine negative probes and expressions together neg_ctrls <- munge(neg_set, ~exprs) head(neg_ctrls, 2) class(neg_ctrls) # combine expressions with all features head(munge(demoData, ~exprs), 2) # combine mapping with the dataset, store gene expressions as a matrix munge(demoData, mapping = ~`BRAFGenotype` + GeneMatrix) # transform the gene expressions to normalized counts exprs_df <- transform(assayData(demoData)[["exprs_norm"]]) class(exprs_df) exprs_df[1:3, 1:2] ## ----warning = FALSE---------------------------------------------------------- # Use the setSeqQCFlags function to set Sequencing QC Flags to your dataset. The default cutoff are displayed in the function. demoData <- setQCFlags(demoData, qcCutoffs = list(Housekeeper = c(failingCutoff = 32, passingCutoff = 100), Imaging = c(fovCutoff = 0.75), BindingDensity = c(minimumBD = 0.1, maximumBD = 2.25, maximumBDSprint = 1.8), ERCCLinearity = c(correlationValue = 0.95), ERCCLoD = c(standardDeviations = 2))) # show the last 6 column names in the data tail(svarLabels(demoData)) # show the first 2 rows of the QC Flags results head(protocolData(demoData)[["QCFlags"]], 2) # show the first 2 rows of the QC Borderline Flags results head(protocolData(demoData)[["QCBorderlineFlags"]], 2) ## ----warning = FALSE---------------------------------------------------------- # set the font Arial theme_set(theme_gray(base_family = "Arial")) girafe(ggobj = autoplot(demoData, type = "housekeep-geom")) ## ----warning = FALSE---------------------------------------------------------- subData <- subset(demoData, select = phenoData(demoData)[["Treatment"]] == "DMSO") girafe(ggobj = autoplot(subData, type = "housekeep-geom")) ## ----warning = FALSE---------------------------------------------------------- girafe(ggobj = autoplot(demoData, type = "lane-bindingDensity")) ## ----warning = FALSE---------------------------------------------------------- girafe(ggobj = autoplot(demoData, type = "lane-fov")) ## ----warning = FALSE---------------------------------------------------------- girafe(ggobj = autoplot(demoData, type = "ercc-linearity")) ## ----warning=FALSE------------------------------------------------------------ girafe(ggobj = autoplot(subData, type = "ercc-lod")) ## ----------------------------------------------------------------------------- girafe( ggobj = autoplot( demoData , "boxplot-feature" , index = featureNames(demoData)[3] , elt = "exprs" ) ) autoplot( demoData , "heatmap-genes" , elt = "exprs_norm" ) ## ----------------------------------------------------------------------------- sessionInfo()