Overview

This tutorial demonstrates how to coerce GeoMxSet objects into Seurat or SpatialExperiment objects and the subsequent analyses. For more examples of what analyses are available in these objects, look at these Seurat or SpatialExperiment vignettes.

Data Processing

Data Processing should occur in GeomxTools. Due to the unique nature of the regions of interest (ROIs), it is recommended to use the preproccesing steps available in GeomxTools rather than the single-cell made preprocessing available in Seurat.

library(GeomxTools)
library(Seurat)
library(SpatialDecon)
library(patchwork)
datadir <- system.file("extdata", "DSP_NGS_Example_Data",
                       package="GeomxTools")
DCCFiles <- dir(datadir, pattern=".dcc$", full.names=TRUE)
PKCFiles <- unzip(zipfile = file.path(datadir,  "/pkcs.zip"))
SampleAnnotationFile <- file.path(datadir, "annotations.xlsx")

demoData <-
  suppressWarnings(readNanoStringGeoMxSet(dccFiles = DCCFiles,
                                          pkcFiles = PKCFiles,
                                          phenoDataFile = SampleAnnotationFile,
                                          phenoDataSheet = "CW005",
                                          phenoDataDccColName = "Sample_ID",
                                          protocolDataColNames = c("aoi",
                                                                   "cell_line",
                                                                   "roi_rep",
                                                                   "pool_rep",
                                                                   "slide_rep")))

After reading in the object, we will do a couple of QC steps.

  1. Shift all 0 counts by 1
  2. Flag low quality ROIs
  3. Flag low quality probes
  4. Remove low quality ROIs and probes
demoData <- shiftCountsOne(demoData, useDALogic=TRUE)
demoData <- setSegmentQCFlags(demoData, qcCutoffs = list(percentSaturation = 45))
demoData <- setBioProbeQCFlags(demoData)

# low sequenced ROIs
lowSaturation <- which(protocolData(demoData)[["QCFlags"]]$LowSaturation)

# probes that are considered outliers 
lowQCprobes <- which(featureData(demoData)[["QCFlags"]]$LowProbeRatio | 
                       featureData(demoData)[["QCFlags"]]$GlobalGrubbsOutlier)

# remove low quality ROIs and probes
passedQC <- demoData[-lowQCprobes, -lowSaturation]

dim(demoData)
## Features  Samples 
##     8707       88
dim(passedQC)
## Features  Samples 
##     8698       83

Objects must be aggregated to Target level data before coercing. This changes the row (gene) information to be the gene name rather than the probe ID.

featureType(passedQC)
## [1] "Probe"
data.frame(assayData(passedQC)[["exprs"]][seq_len(3), seq_len(3)])
DSP.1001250002642.A02.dcc DSP.1001250002642.A03.dcc DSP.1001250002642.A04.dcc
RTS0039454 294 239 6
RTS0039455 270 281 6
RTS0039456 255 238 3
target_demoData <- aggregateCounts(passedQC)

featureType(target_demoData)
## [1] "Target"
data.frame(assayData(target_demoData)[["exprs"]][seq_len(3), seq_len(3)])
DSP.1001250002642.A02.dcc DSP.1001250002642.A03.dcc DSP.1001250002642.A04.dcc
ACTA2 328.286182 323.490808 6.081111
FOXA2 4.919019 4.919019 6.942503
NANOG 2.954177 4.128918 8.359554

It is recommended to normalize using a GeoMx specific model before coercing. The normalized data is now in the assayData slot called “q_norm”.

norm_target_demoData <- normalize(target_demoData, norm_method="quant",
                                  desiredQuantile = .75, toElt = "q_norm")

assayDataElementNames(norm_target_demoData)
## [1] "exprs"  "q_norm"
data.frame(assayData(norm_target_demoData)[["q_norm"]][seq_len(3), seq_len(3)])
DSP.1001250002642.A02.dcc DSP.1001250002642.A03.dcc DSP.1001250002642.A04.dcc
ACTA2 349.571598 344.257297 3.968122
FOXA2 5.237958 5.234795 4.530208
NANOG 3.145720 4.393974 5.454880

Seurat

Seurat Coercion

The three errors that can occur when trying to coerce to Seurat are:

  1. object must be on the target level
  2. object should be normalized, if you want raw data you can set forceRaw to TRUE
  3. normalized count matrix name must be valid
as.Seurat(demoData)
## Error in as.Seurat.NanoStringGeoMxSet(demoData): Data must be on Target level before converting to a Seurat Object
as.Seurat(target_demoData, normData = "exprs")
## Error in as.Seurat.NanoStringGeoMxSet(target_demoData, normData = "exprs"): It is NOT recommended to use Seurat's normalization for GeoMx data. 
##              Normalize using GeomxTools::normalize() or set forceRaw to TRUE if you want to continue with Raw data
as.Seurat(norm_target_demoData, normData = "exprs_norm")
## Error in as.Seurat.NanoStringGeoMxSet(norm_target_demoData, normData = "exprs_norm"): The normData name "exprs_norm" is not a valid assay name. Valid names are: exprs, q_norm

After coercing to a Seurat object all of the metadata is still accessible.

demoSeurat <- as.Seurat(x = norm_target_demoData, normData = "q_norm")

demoSeurat # overall data object
## An object of class Seurat 
## 1821 features across 83 samples within 1 assay 
## Active assay: GeoMx (1821 features, 0 variable features)
##  2 layers present: counts, data
head(demoSeurat, 3) # most important ROI metadata
orig.ident nCount_GeoMx nFeature_GeoMx slide name scan name panel roi segment area NegGeoMean_Six-gene_test_v1_v1.1 NegGeoMean_VnV_GeoMx_Hs_CTA_v1.2 NegGeoSD_Six-gene_test_v1_v1.1 NegGeoSD_VnV_GeoMx_Hs_CTA_v1.2 q_norm_qFactors SampleID aoi cell_line roi_rep pool_rep slide_rep
DSP-1001250002642-A02.dcc GeoMx 63524.55 1821 6panel-old-slide1 (PTL-10891) cw005 (PTL-10891) Slide1 (v1.2) VnV Cancer Transcriptome Atlas, (v1.0) Six gene test custom 1 Geometric Segment 31318.73 1.487738 3.722752 1.560397 1.796952 0.9391100 DSP-1001250002642-A02 Geometric Segment-aoi-001 HS578T 1 1 1
DSP-1001250002642-A03.dcc GeoMx 62357.01 1821 6panel-old-slide1 (PTL-10891) cw005 (PTL-10891) Slide1 (v1.2) VnV Cancer Transcriptome Atlas, (v1.0) Six gene test custom 2 Geometric Segment 31318.73 2.518775 3.068217 1.820611 1.806070 0.9396774 DSP-1001250002642-A03 Geometric Segment-aoi-001 HS578T 2 1 1
DSP-1001250002642-A04.dcc GeoMx 82370.45 1821 6panel-old-slide1 (PTL-10891) cw005 (PTL-10891) Slide1 (v1.2) VnV Cancer Transcriptome Atlas, (v1.0) Six gene test custom 3 Geometric Segment 31318.73 2.847315 3.556275 1.654831 1.762066 1.5324910 DSP-1001250002642-A04 Geometric Segment-aoi-001 HEL 1 1 1
demoSeurat@misc[1:8] # experiment data
## $PKCFileName
##            VnV_GeoMx_Hs_CTA_v1.2            Six-gene_test_v1_v1.1 
## "VnV Cancer Transcriptome Atlas"           "Six gene test custom" 
## 
## $PKCModule
## VnV_GeoMx_Hs_CTA_v1.2 Six-gene_test_v1_v1.1 
##    "VnV_GeoMx_Hs_CTA"    "Six-gene_test_v1" 
## 
## $PKCFileVersion
## VnV_GeoMx_Hs_CTA_v1.2 Six-gene_test_v1_v1.1 
##                   1.2                   1.1 
## 
## $PKCFileDate
## VnV_GeoMx_Hs_CTA_v1.2 Six-gene_test_v1_v1.1 
##              "200518"              "200707" 
## 
## $AnalyteType
## VnV_GeoMx_Hs_CTA_v1.2 Six-gene_test_v1_v1.1 
##                 "RNA"                 "RNA" 
## 
## $MinArea
## VnV_GeoMx_Hs_CTA_v1.2 Six-gene_test_v1_v1.1 
##                 16000                 16000 
## 
## $MinNuclei
## VnV_GeoMx_Hs_CTA_v1.2 Six-gene_test_v1_v1.1 
##                   200                   200 
## 
## $shiftedByOne
## [1] TRUE
head(demoSeurat@misc$sequencingMetrics) # sequencing metrics
FileVersion SoftwareVersion Date Plate_ID Well SeqSetId Raw Trimmed Stitched Aligned umiQ30 rtsQ30 DeduplicatedReads NTC_ID NTC Trimmed (%) Stitched (%) Aligned (%) Saturated (%)
DSP-1001250002642-A02.dcc 0.1 1.0.0 2020-07-14 1001250002642 A02 VH00121:3:AAAG2YWM5 646250 646250 616150 610390 0.9785 0.9804 312060 DSP-1001250002642-A01.dcc 7 100 95.34236 94.45106 48.87531
DSP-1001250002642-A03.dcc 0.1 1.0.0 2020-07-14 1001250002642 A03 VH00121:3:AAAG2YWM5 629241 629241 603243 597280 0.9784 0.9811 305528 DSP-1001250002642-A01.dcc 7 100 95.86836 94.92071 48.84677
DSP-1001250002642-A04.dcc 0.1 1.0.0 2020-07-14 1001250002642 A04 VH00121:3:AAAG2YWM5 831083 831083 798188 791804 0.9785 0.9801 394981 DSP-1001250002642-A01.dcc 7 100 96.04191 95.27376 50.11632
DSP-1001250002642-A05.dcc 0.1 1.0.0 2020-07-14 1001250002642 A05 VH00121:3:AAAG2YWM5 884485 884485 849060 842133 0.9796 0.9814 424162 DSP-1001250002642-A01.dcc 7 100 95.99484 95.21168 49.63242
DSP-1001250002642-A06.dcc 0.1 1.0.0 2020-07-14 1001250002642 A06 VH00121:3:AAAG2YWM5 781936 781936 751930 744669 0.9779 0.9803 355121 DSP-1001250002642-A01.dcc 7 100 96.16260 95.23401 52.31156
DSP-1001250002642-A07.dcc 0.1 1.0.0 2020-07-14 1001250002642 A07 VH00121:3:AAAG2YWM5 703034 703034 674815 668726 0.9776 0.9797 341008 DSP-1001250002642-A01.dcc 7 100 95.98611 95.12001 49.00632
head(demoSeurat@misc$QCMetrics$QCFlags) # QC metrics
LowReads LowTrimmed LowStitched LowAligned LowSaturation LowNegatives HighNTC LowArea
DSP-1001250002642-A02.dcc FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
DSP-1001250002642-A03.dcc FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
DSP-1001250002642-A04.dcc FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
DSP-1001250002642-A05.dcc FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
DSP-1001250002642-A06.dcc FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
DSP-1001250002642-A07.dcc FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
head(demoSeurat@assays$GeoMx@meta.data) # gene metadata
TargetName Module CodeClass GeneID SystematicName Negative
ACTA2 VnV_GeoMx_Hs_CTA_v1.2 Endogenous 59 ACTA2 FALSE
FOXA2 VnV_GeoMx_Hs_CTA_v1.2 Endogenous 3170 FOXA2 FALSE
NANOG VnV_GeoMx_Hs_CTA_v1.2 Endogenous 79923, 388112 NANOG, NANOGP8 FALSE
TRAC VnV_GeoMx_Hs_CTA_v1.2 Endogenous NA TRAC FALSE
TRBC1/2 VnV_GeoMx_Hs_CTA_v1.2 Endogenous NA TRBC1 FALSE
TRDC VnV_GeoMx_Hs_CTA_v1.2 Endogenous NA TRDC FALSE

All Seurat functionality is available after coercing. Outputs might differ if the ident value is set or not.

VlnPlot(demoSeurat, features = "nCount_GeoMx", pt.size = 0.1)

demoSeurat <- as.Seurat(norm_target_demoData, normData = "q_norm", ident = "cell_line")
VlnPlot(demoSeurat, features = "nCount_GeoMx", pt.size = 0.1)

Simple GeoMx data workflow

Here is an example of a typical dimensional reduction workflow.

demoSeurat <- FindVariableFeatures(demoSeurat)
demoSeurat <- ScaleData(demoSeurat)
demoSeurat <- RunPCA(demoSeurat, assay = "GeoMx", verbose = FALSE)
demoSeurat <- FindNeighbors(demoSeurat, reduction = "pca", dims = seq_len(30))
demoSeurat <- FindClusters(demoSeurat, verbose = FALSE)
demoSeurat <- RunUMAP(demoSeurat, reduction = "pca", dims = seq_len(30))

DimPlot(demoSeurat, reduction = "umap", label = TRUE, group.by = "cell_line")