library(ggpubr)
library(gtools)
library(purrr)
library(scales)
library(pheatmap)
library(data.table)
library(gridExtra)
library(png)
library(knitr)
library(grid)
library(styler)
library(FactoMineR)
library(factoextra)
library(magick)
library(rlang)
library(GGally)
library(ggplotify)
library(remotes)
library(dplyr)
library(tidyr)
knitr::opts_chunk$set(echo = TRUE, message=FALSE,warning = FALSE,
                      fig.align = 'center',
                      dev = "png", 
                       tidy='styler', tidy.opts=list(strict=TRUE))


## Color Format
colFmt <- function(x,color) {
  
  outputFormat <- knitr::opts_knit$get("rmarkdown.pandoc.to")
  
  if(outputFormat == 'latex') {
    ret <- paste("\\textcolor{",color,"}{",x,"}",sep="")
  } else if(outputFormat == 'html') {
    ret <- paste("<font color='",color,"'>",x,"</font>",sep="")
  } else {
    ret <- x
  }

  return(ret)
}

Introduction

General information

protGear is a package for protein micro data processing just before the main analysis. The package loads the ‘gpr’ or ‘txt’ file format extracted by the quantification software and merges this with the specific sample identifiers. The package processes multiple files extracted batch by batch with their corresponding sample identifier file. The sample identifier file has 2 variables ‘v1’ and ‘v2’ which indicate the mini-array or block number and sample identifier respectively. The ‘gpr’ file and the corresponding sample identifier file have the same file name. protGear also provides a web based \(Shiny^{(R)}\) platform for real time visualization of the data processing.

In this vignette the general work-flow of protGear will be outlined by processing a sample dataset from a multicentre study Plasmodium falciparum Merozoite Protein Microarray. The multicentre study design motivated the development of the protGear suite.

The details of the methodologies are published here https://doi.org/10.1016/j.csbj.2021.04.044

Analysis setup

Create 2 folders that hold the ‘.gpr’ files and the corresponding sample identifier files.

Folder structure of array and sample ID files

Sample identifier file

sample ID file structure

Installation

To install protGear from BioConductor the following commands in R

## install from BioConductor
if (!require("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}
BiocManager::install("protGear")

Importing data

## load the package
suppressWarnings(library(protGear))

The first part is to specify the parameters of the micro-array experiment to assist in processing the data. The parameters specified are

The parameters “chip_path”, “channel” , “totsamples” and “sampleID_path” are mandatory.

## specify the the parameters to process the data
genepix_vars <- array_vars(
  channel = "635",
  chip_path = system.file("extdata/array_data/", package = "protGear"),
  totsamples = 21,
  blockspersample = 2,
  sampleID_path = system.file("extdata/array_sampleID/", package = "protGear"),
  mig_prefix = "_first",
  machine = 1,
  ## optional
  date_process = "0520"
)

The exact channel used should be checked in the header of the file from the quantification software under Wavelengths.

header_gpr <- readLines(system.file("extdata/array_data/machine1/KK2-06.txt", package = "protGear"),
  n = 40
)
header_gpr <- gsub("\"", "", header_gpr[1:32])
header_gpr[1:32]
##  [1] "ATF\t1\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                                
##  [2] "29\t31\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                                
##  [3] "Type=GenePix Export 3\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                               
##  [4] "DateTime=2017/09/05 17:17:47\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                        
##  [5] "Settings=\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                           
##  [6] "GalFile=C:\\Users\\rkimathi\\Desktop\\GAL FILE DATA.gal\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                             
##  [7] "PixelSize=10\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                        
##  [8] "Wavelengths=635\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                     
##  [9] "ImageFiles=Z:\\Multicenter study\\4_9_2017\\data\\BRB0001.tif 0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                     
## [10] "NormalizationMethod=None\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                            
## [11] "NormalizationFactors=1\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                              
## [12] "JpegImage=\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                          
## [13] "StdDev=Type 1\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                       
## [14] "FeatureType=Circular\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                
## [15] "Barcode=\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                            
## [16] "BackgroundSubtraction=LocalFeature\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                  
## [17] "ImageOrigin=0, 0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                    
## [18] "JpegOrigin=1670, 840\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                
## [19] "Creator=GenePix Pro 7.3.0.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                         
## [20] "Scanner=GenePix 4300 [141110]\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                       
## [21] "FocusPosition=0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                     
## [22] "Temperature=0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                       
## [23] "LinesAveraged=1\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                     
## [24] "Comment=\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                            
## [25] "PMTGain=400\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                         
## [26] "ScanPower=100\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                       
## [27] "LaserPower=0.55\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                     
## [28] "Filters=Standard Red\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                
## [29] "ScanRegion=0,0,2200,7200\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                            
## [30] "ScanArea=1\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                          
## [31] "Supplier=\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"                                                                                           
## [32] "Flags\tNormalize\tAutoflag\tBlock\tColumn\tRow\tName\tID\tX\tY\tF635 Median\tB635\tB635 Median\tSNR 635\tF635 Median - B635\tIndex"

The function check_sampleID_files() helps to check whether each micro array file has a corresponding sample identifier file. The sample identifier files are generated from the lab plate maps to match the corresponding samples on a specific slide.If the sample identifier file is missing, protGear automatically generates the id’s.

Spatial structure of slide

protGear offers a functionality to inspect the slide visually for any strong spatial biases when the scan image is not available. However, we recommend using the scanned image to visualize the spatial artefacts that might not be recorded in the .gpr file. We include the functions visualize_slide() and visualize_slide_2d() to check the spatial structure. The functions are build on structure_plot() which shows the block and mini-array structure of a slide.

Visualize the foreground MFI

Here we visualize the foreground MFI using the visualize_slide function

visualize_slide(
  infile = system.file("extdata/array_data/machine1/KK2-06.txt", package = "protGear"),
  MFI_var = "F635 Median"
)