Introduction to the NanoStringRCCSet Class

David Henderson, Patrick Aboyoun, Nicole Ortogero, Zhi Yang, Jason Reeves, Kara Gorman, Rona Vitancol, Thomas Smith

2021-05-19

Introduction

The NanoStringRCCSet was inherited from Biobase’s ExpressionSet class. The NanoStringRCCSet class was designed to encapsulate data and corresponding methods for Nanostring RCC files generated from the NanoString nCounter platform.

Loading Packages

Loading the NanoStringNCTools package allows users access to the NanoStringRCCSet class.

library(NanoStringNCTools)

Load additional packages for vignette plotting.

library(ggthemes)
library(ggiraph)

Building a NanoStringRCCSet from .RCC files

datadir <- system.file("extdata", "3D_Bio_Example_Data",
                       package = "NanoStringNCTools")
rcc_files <- dir(datadir, pattern = "SKMEL.*\\.RCC$", full.names = TRUE)
rlf_file <- file.path(datadir, "3D_SolidTumor_Sig.rlf")
sample_annotation <- file.path(datadir, "3D_SolidTumor_PhenoData.csv")
demoData <- readNanoStringRccSet(rcc_files, rlfFile = rlf_file, 
                                 phenoDataFile = sample_annotation)
class( demoData )
#> [1] "NanoStringRccSet"
#> attr(,"package")
#> [1] "NanoStringNCTools"
isS4( demoData )
#> [1] TRUE
is( demoData, "ExpressionSet" )
#> [1] TRUE
demoData
#> NanoStringRccSet (storageMode: lockedEnvironment)
#> assayData: 397 features, 12 samples 
#>   element names: exprs 
#> protocolData
#>   sampleNames: SKMEL2-DMSO-8h-R1_04.RCC SKMEL2-DMSO-8h-R2_04.RCC ...
#>     SKMEL28-VEM-8h-R3_10.RCC (12 total)
#>   varLabels: FileVersion SoftwareVersion ... CartridgeBarcode (17
#>     total)
#>   varMetadata: labelDescription
#> phenoData
#>   sampleNames: SKMEL2-DMSO-8h-R1_04.RCC SKMEL2-DMSO-8h-R2_04.RCC ...
#>     SKMEL28-VEM-8h-R3_10.RCC (12 total)
#>   varLabels: Treatment BRAFGenotype
#>   varMetadata: labelDescription
#> featureData
#>   featureNames: Endogenous_TP53_NM_000546.2
#>     Endogenous_IL22RA2_NM_181310.1 ... SNV_REF_PIK3CA Ref (exon
#>     10)|hg19|+|chr3:178936060-178936141_nRef_00032.1 (397 total)
#>   fvarLabels: CodeClass GeneName ... BarcodeComments (10 total)
#>   fvarMetadata: labelDescription
#> experimentData: use 'experimentData(object)'
#> Annotation: 3D_SolidTumor_Sig 
#> signature: none

Accessing and Assigning NanoStringRCCSet Data Members

Alongside the accessors associated with the ExpressionSet class, NanoStringRCCSet objects have unique additional assignment and accessor methods faciliting common ways to view nCounter data and associated labels.

head( pData( demoData ), 2 )
#>                          Treatment BRAFGenotype
#> SKMEL2-DMSO-8h-R1_04.RCC      DMSO        wt/wt
#> SKMEL2-DMSO-8h-R2_04.RCC      DMSO        wt/wt
protocolData( demoData )
#> An object of class 'AnnotatedDataFrame'
#>   sampleNames: SKMEL2-DMSO-8h-R1_04.RCC SKMEL2-DMSO-8h-R2_04.RCC ...
#>     SKMEL28-VEM-8h-R3_10.RCC (12 total)
#>   varLabels: FileVersion SoftwareVersion ... CartridgeBarcode (17
#>     total)
#>   varMetadata: labelDescription
svarLabels( demoData )
#>  [1] "Treatment"        "BRAFGenotype"     "FileVersion"      "SoftwareVersion" 
#>  [5] "SystemType"       "SampleID"         "SampleOwner"      "SampleComments"  
#>  [9] "SampleDate"       "SystemAPF"        "AssayType"        "LaneID"          
#> [13] "FovCount"         "FovCounted"       "ScannerID"        "StagePosition"   
#> [17] "BindingDensity"   "CartridgeID"      "CartridgeBarcode"
head( sData(demoData), 2 )
#>                          Treatment BRAFGenotype FileVersion SoftwareVersion
#> SKMEL2-DMSO-8h-R1_04.RCC      DMSO        wt/wt         1.7         4.0.0.3
#> SKMEL2-DMSO-8h-R2_04.RCC      DMSO        wt/wt         1.7         4.0.0.3
#>                          SystemType        SampleID SampleOwner  SampleComments
#> SKMEL2-DMSO-8h-R1_04.RCC       Gen2 SKMEL2-DMSO-8hr             DNA-RNA-Protein
#> SKMEL2-DMSO-8h-R2_04.RCC       Gen2 SKMEL2-DMSO-8hr             DNA-RNA-Protein
#>                          SampleDate SystemAPF AssayType LaneID FovCount
#> SKMEL2-DMSO-8h-R1_04.RCC 2017-01-13   n6_vDV1      <NA>      4      280
#> SKMEL2-DMSO-8h-R2_04.RCC 2017-02-07   n6_vDV1      <NA>      4      280
#>                          FovCounted ScannerID StagePosition BindingDensity
#> SKMEL2-DMSO-8h-R1_04.RCC        268 1207C0049             3           0.80
#> SKMEL2-DMSO-8h-R2_04.RCC        280 1207C0049             6           1.03
#>                                     CartridgeID CartridgeBarcode
#> SKMEL2-DMSO-8h-R1_04.RCC   AGBT-3DBio-C1-SKMEL2                 
#> SKMEL2-DMSO-8h-R2_04.RCC AGBT-3DBio-3-C1-SKMEL2

Design information can be assigned to the NanoStringRCCSet object, as well as feature and sample labels to use for NanoStringRCCSet plotting methods.

design( demoData ) <- ~ `Treatment`
design( demoData )
#> ~Treatment

dimLabels( demoData )
#> [1] "GeneName" "SampleID"
protocolData(demoData)[["Sample ID"]] <- sampleNames(demoData)
dimLabels( demoData )[2] <- "Sample ID"
dimLabels( demoData )
#> [1] "GeneName"  "Sample ID"

Summarizing NanoString nCounter Data

Easily summarize count results using the summary method. Data summaries can be generated across features or samples. Labels can be used to generate summaries based on feature or sample groupings.

head( summary( demoData , MARGIN = 1 ), 2 )
#>                                  GeomMean SizeFactor MeanLog2    SDLog2 Min
#> Endogenous_TP53_NM_000546.2    547.134439 5.88657439 9.095752 0.4357479 270
#> Endogenous_IL22RA2_NM_181310.1   4.842601 0.05210115 2.275782 1.1326693   1
#>                                    Q1 Median     Q3 Max
#> Endogenous_TP53_NM_000546.2    519.00    602 611.25 810
#> Endogenous_IL22RA2_NM_181310.1   2.75      6   9.25  11
head( summary( demoData , MARGIN = 2 ), 2 )
#>                           GeomMean SizeFactor MeanLog2   SDLog2 Min Q1 Median
#> SKMEL2-DMSO-8h-R1_04.RCC  81.91473  0.8813138 6.356051 3.548682   0 11     42
#> SKMEL2-DMSO-8h-R2_04.RCC 111.61733  1.2008817 6.802417 3.571600   2 13     54
#>                           Q3   Max
#> SKMEL2-DMSO-8h-R1_04.RCC 525 30467
#> SKMEL2-DMSO-8h-R2_04.RCC 786 79362
unique( sData( demoData )$"Treatment" )
#> [1] "DMSO" "VEM"
head( summary( demoData , MARGIN = 2, GROUP = "Treatment" )$VEM, 2 )
#>                         GeomMean SizeFactor MeanLog2   SDLog2 Min Q1 Median
#> SKMEL2-VEM-8h-R1_10.RCC 105.0406   1.113396 6.714804 3.543809   0 14     52
#> SKMEL2-VEM-8h-R2_10.RCC 105.2725   1.115854 6.717985 3.652488   1 11     56
#>                           Q3   Max
#> SKMEL2-VEM-8h-R1_10.RCC  789 40330
#> SKMEL2-VEM-8h-R2_10.RCC 1004 44614
head( summary( demoData , MARGIN = 2, GROUP = "Treatment" )$"DMSO", 2 )
#>                           GeomMean SizeFactor MeanLog2   SDLog2 Min Q1 Median
#> SKMEL2-DMSO-8h-R1_04.RCC  81.91473  0.8945544 6.356051 3.548682   0 11     42
#> SKMEL2-DMSO-8h-R2_04.RCC 111.61733  1.2189233 6.802417 3.571600   2 13     54
#>                           Q3   Max
#> SKMEL2-DMSO-8h-R1_04.RCC 525 30467
#> SKMEL2-DMSO-8h-R2_04.RCC 786 79362
head( summary( demoData , MARGIN = 2, GROUP = "Treatment", log2 = FALSE )$"DMSO", 2 )
#>                              Mean       SD Skewness Kurtosis Min Q1 Median  Q3
#> SKMEL2-DMSO-8h-R1_04.RCC 1335.766 3763.740 4.327986 21.46799   0 11     42 525
#> SKMEL2-DMSO-8h-R2_04.RCC 1828.778 6226.835 7.636919 75.77340   2 13     54 786
#>                            Max
#> SKMEL2-DMSO-8h-R1_04.RCC 30467
#> SKMEL2-DMSO-8h-R2_04.RCC 79362

Subsetting NanoStringRCCSet Objects

Common subsetting methods including those to separate endogenous features from controls are provided with NanoStringRCCSet objects. In addition, users can use the subset or select arguments to further subset by feature or sample, respectively.

length( sampleNames( demoData ) )
#> [1] 12
length( sampleNames( subset( demoData , 
                             select = phenoData( demoData )[["Treatment"]] == "VEM" ) ) )
#> [1] 6

dim( exprs( demoData ) )
#> [1] 397  12
dim( exprs( endogenousSubset( demoData, 
                              select = phenoData( demoData )[["Treatment"]] == "VEM" ) ) )
#> [1] 180   6

with( housekeepingSubset( demoData ) , table( CodeClass ) )
#> CodeClass
#> Housekeeping 
#>           12
with( negativeControlSubset( demoData ) , table( CodeClass ) )
#> CodeClass
#> Negative 
#>        6
with( positiveControlSubset( demoData ) , table( CodeClass ) )
#> CodeClass
#> Positive 
#>        6
with( controlSubset( demoData ) , table( CodeClass ) )
#> CodeClass
#>      Housekeeping          Negative PROTEIN_CELL_NORM       PROTEIN_NEG 
#>                12                 6                 1                 2 
#>          Positive     SNV_INPUT_CTL           SNV_NEG       SNV_PCR_CTL 
#>                 6                 3                 6                 3 
#>           SNV_POS       SNV_UDG_CTL 
#>                 6                 2
with( nonControlSubset( demoData ) , table( CodeClass ) )
#> CodeClass
#> Endogenous    PROTEIN    SNV_REF    SNV_VAR 
#>        180         26         40        104

Apply Functions Across Assay Data

Similar to the ExpressionSet’s esApply function, an equivalent method is available with NanoStringRCCSet objects. Functions can be applied to assay data feature- or sample-wise.

assayDataElement( demoData, "demoElem" ) <- 
  assayDataApply( demoData, MARGIN=2, FUN=log, base=10, elt="exprs" )
assayDataElement( demoData, "demoElem" )[1:3, 1:2]
#>                                SKMEL2-DMSO-8h-R1_04.RCC
#> Endogenous_TP53_NM_000546.2                    2.540329
#> Endogenous_IL22RA2_NM_181310.1                 0.698970
#> Endogenous_IL2_NM_000586.2                     0.301030
#>                                SKMEL2-DMSO-8h-R2_04.RCC
#> Endogenous_TP53_NM_000546.2                   2.7788745
#> Endogenous_IL22RA2_NM_181310.1                0.9542425
#> Endogenous_IL2_NM_000586.2                    0.9030900
assayDataApply( demoData, MARGIN=1, FUN=mean, elt="demoElem")[1:5]
#>    Endogenous_TP53_NM_000546.2 Endogenous_IL22RA2_NM_181310.1 
#>                      2.7380941                      0.6850787 
#>     Endogenous_IL2_NM_000586.2    Endogenous_CCR5_NM_000579.1 
#>                      0.5595554                           -Inf 
#> Endogenous_PRLR_NM_001204318.1 
#>                      0.9411847

head( esBy( demoData, 
            GROUP = "Treatment", 
            FUN = function( x ) { 
              assayDataApply( x, MARGIN = 1, FUN=mean, elt="demoElem" ) 
            } ) )
#>                                     DMSO       VEM
#> Endogenous_TP53_NM_000546.2    2.6962710 2.7799171
#> Endogenous_IL22RA2_NM_181310.1 0.5927171 0.7774403
#> Endogenous_IL2_NM_000586.2     0.4808935 0.6382173
#> Endogenous_CCR5_NM_000579.1         -Inf 0.7821118
#> Endogenous_PRLR_NM_001204318.1 1.0082754 0.8740940
#> Endogenous_LIF_NM_002309.3     1.8067671 1.4661876

There is also a preloaded nCounter normalization method that comes with the NanoStringRCCSet class. This includes the default normalization performed in nSolver.

demoData <- normalize( demoData , type="nSolver", fromELT = "exprs" , toELT = "exprs_norm" )
assayDataElement( demoData , elt = "exprs_norm" )[1:3, 1:2]
#>                                SKMEL2-DMSO-8h-R1_04.RCC
#> Endogenous_TP53_NM_000546.2                    8.753132
#> Endogenous_IL22RA2_NM_181310.1                 2.895151
#> Endogenous_IL2_NM_000586.2                     1.895151
#>                                SKMEL2-DMSO-8h-R2_04.RCC
#> Endogenous_TP53_NM_000546.2                    8.983110
#> Endogenous_IL22RA2_NM_181310.1                 3.071419
#> Endogenous_IL2_NM_000586.2                     2.919416

Transforming NanoStringRCCSet Data to Data Frames

The NanoStringRCCSet munge function helps users generate data frames for downstream modeling and visualization. There is also a transform method, which functions similarly to the base transform function.

neg_set <- negativeControlSubset( demoData )
class( neg_set )
#> [1] "NanoStringRccSet"
#> attr(,"package")
#> [1] "NanoStringNCTools"
neg_ctrls <- munge( neg_set )
head( neg_ctrls, 2 )
#>                      FeatureName               SampleName exprs Treatment
#> 1 Negative_NEG_C(0)_ERCC_00019.1 SKMEL2-DMSO-8h-R1_04.RCC     9      DMSO
#> 2 Negative_NEG_D(0)_ERCC_00076.1 SKMEL2-DMSO-8h-R1_04.RCC     9      DMSO
class( neg_ctrls )
#> [1] "data.frame"
head( munge( demoData ), 2 )
#>                      FeatureName               SampleName exprs Treatment
#> 1    Endogenous_TP53_NM_000546.2 SKMEL2-DMSO-8h-R1_04.RCC   347      DMSO
#> 2 Endogenous_IL22RA2_NM_181310.1 SKMEL2-DMSO-8h-R1_04.RCC     5      DMSO
munge( demoData, mapping = ~`BRAFGenotype` + GeneMatrix )
#> DataFrame with 12 rows and 2 columns
#>                           BRAFGenotype   GeneMatrix
#>                            <character>     <matrix>
#> SKMEL2-DMSO-8h-R1_04.RCC         wt/wt  347:5:2:...
#> SKMEL2-DMSO-8h-R2_04.RCC         wt/wt  601:9:8:...
#> SKMEL2-DMSO-8h-R3_04.RCC         wt/wt  270:2:1:...
#> SKMEL2-VEM-8h-R1_10.RCC          wt/wt 588:10:5:...
#> SKMEL2-VEM-8h-R2_10.RCC          wt/wt  810:3:5:...
#> ...                                ...          ...
#> SKMEL28-DMSO-8h-R2_04.RCC      mut/mut  722:5:3:...
#> SKMEL28-DMSO-8h-R3_04.RCC      mut/mut  608:1:4:...
#> SKMEL28-VEM-8h-R1_10.RCC       mut/mut 603:11:6:...
#> SKMEL28-VEM-8h-R2_10.RCC       mut/mut 618:10:9:...
#> SKMEL28-VEM-8h-R3_10.RCC       mut/mut  519:7:1:...

exprs_df <- transform( assayData( demoData )[["exprs_norm"]] )
class( exprs_df )
#> [1] "data.frame"
exprs_df[1:3, 1:2]
#>                                SKMEL2.DMSO.8h.R1_04.RCC
#> Endogenous_TP53_NM_000546.2                    8.753132
#> Endogenous_IL22RA2_NM_181310.1                 2.895151
#> Endogenous_IL2_NM_000586.2                     1.895151
#>                                SKMEL2.DMSO.8h.R2_04.RCC
#> Endogenous_TP53_NM_000546.2                    8.983110
#> Endogenous_IL22RA2_NM_181310.1                 3.071419
#> Endogenous_IL2_NM_000586.2                     2.919416

Built-in Quality Control Assessment

Users can flag samples that fail QC thresholds or have borderline results based on housekeeper and ERCC expression, imaging quality, and binding density. Additionally, QC results can be visualized using the NanoStringRCCSet autoplot method.

demoData <- setQCFlags( demoData )
tail( svarLabels( demoData ) )
#> [1] "BindingDensity"    "CartridgeID"       "CartridgeBarcode" 
#> [4] "Sample ID"         "QCFlags"           "QCBorderlineFlags"
head( protocolData( demoData )[["QCFlags"]], 2 )
#>                          Imaging Binding Linearity   LoD Housekeeping
#> SKMEL2-DMSO-8h-R1_04.RCC   FALSE   FALSE     FALSE FALSE        FALSE
#> SKMEL2-DMSO-8h-R2_04.RCC   FALSE   FALSE     FALSE FALSE        FALSE
head( protocolData( demoData )[["QCBorderlineFlags"]], 2 )
#>                          Imaging Binding Linearity   LoD Housekeeping
#> SKMEL2-DMSO-8h-R1_04.RCC   FALSE   FALSE     FALSE FALSE        FALSE
#> SKMEL2-DMSO-8h-R2_04.RCC   FALSE   FALSE     FALSE FALSE        FALSE

Binding Density QC

theme_set( theme_gray( base_family = "Arial" ) )
girafe( ggobj = autoplot( demoData , "bindingDensity-mean" ) )
#> Warning: Ignoring unknown parameters: tooltip_fill
girafe( ggobj = autoplot( demoData , "bindingDensity-sd" ) ) #> Warning: Ignoring unknown parameters: tooltip_fill

QC by Lane

girafe( ggobj = autoplot( demoData , "lane-bindingDensity" ) )
#> Warning: Ignoring unknown parameters: tooltip_fill
girafe( ggobj = autoplot( demoData , "lane-fov" ) ) #> Warning: Ignoring unknown parameters: tooltip_fill

Housekeeping Genes QC

subData <- subset( demoData, select = phenoData( demoData )[["Treatment"]] == "DMSO" )
girafe( ggobj = autoplot( subData, "housekeep-geom" ) )
#> Warning: Ignoring unknown parameters: tooltip_fill

ERCC QC

girafe( ggobj = autoplot( demoData , "ercc-linearity" ) )
#> Warning: Ignoring unknown parameters: tooltip_fill

#> Warning: Ignoring unknown parameters: tooltip_fill
girafe( ggobj = autoplot( subData , "ercc-lod" ) ) #> Warning: Ignoring unknown parameters: tooltip_fill #> Warning: Transformation introduced infinite values in continuous y-axis #> Warning: Transformation introduced infinite values in continuous y-axis #> Warning: Removed 1 rows containing non-finite values (stat_boxplot). #> Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Data exploration

Further data exploration can be performed by visualizing a select feature’s expression or by getting a bird’s eye view with expression heatmaps auto-generated with unsupervised clustering dendrograms.

#girafe( ggobj = autoplot( demoData , "boxplot-feature" , index = featureNames(demoData)[3] , elt = "exprs" ) )
#autoplot( demoData , "heatmap-genes" , elt = "exprs_norm" )
sessionInfo()
#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.13-bioc/R/lib/libRblas.so
#> LAPACK: /home/biocbuild/bbs-3.13-bioc/R/lib/libRlapack.so
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#> [1] ggiraph_0.7.10          ggthemes_4.2.4          NanoStringNCTools_1.0.0
#> [4] ggplot2_3.3.3           S4Vectors_0.30.0        Biobase_2.52.0         
#> [7] BiocGenerics_0.38.0    
#> 
#> loaded via a namespace (and not attached):
#>  [1] beeswarm_0.3.1         tidyselect_1.1.1       xfun_0.23             
#>  [4] bslib_0.2.5.1          purrr_0.3.4            colorspace_2.0-1      
#>  [7] vctrs_0.3.8            generics_0.1.0         htmltools_0.5.1.1     
#> [10] yaml_2.2.1             utf8_1.2.1             rlang_0.4.11          
#> [13] jquerylib_0.1.4        pillar_1.6.1           glue_1.4.2            
#> [16] withr_2.4.2            DBI_1.1.1              RColorBrewer_1.1-2    
#> [19] uuid_0.1-4             GenomeInfoDbData_1.2.6 lifecycle_1.0.0       
#> [22] stringr_1.4.0          zlibbioc_1.38.0        Biostrings_2.60.0     
#> [25] munsell_0.5.0          gtable_0.3.0           htmlwidgets_1.5.3     
#> [28] evaluate_0.14          labeling_0.4.2         knitr_1.33            
#> [31] IRanges_2.26.0         GenomeInfoDb_1.28.0    vipor_0.4.5           
#> [34] fansi_0.4.2            Rcpp_1.0.6             scales_1.1.1          
#> [37] jsonlite_1.7.2         XVector_0.32.0         farver_2.1.0          
#> [40] systemfonts_1.0.2      digest_0.6.27          stringi_1.6.2         
#> [43] dplyr_1.0.6            grid_4.1.0             tools_4.1.0           
#> [46] bitops_1.0-7           magrittr_2.0.1         sass_0.4.0            
#> [49] RCurl_1.98-1.3         tibble_3.1.2           crayon_1.4.1          
#> [52] pkgconfig_2.0.3        pheatmap_1.0.12        ellipsis_0.3.2        
#> [55] ggbeeswarm_0.6.0       assertthat_0.2.1       rmarkdown_2.8         
#> [58] R6_2.5.0               compiler_4.1.0