Chapter 4 MS2 preprocessing untargeted

4.1 XCMS

The script 2a.Pipeline_XCMS.R is available under the folder of J:\CBMR\SUN-CBMR-Metabolomics\Workflow\Script\modules\MS2_preprocessing

4.1.1 Source paths on the server

source("H:/From_SUND/Scripts/utils/utils_MS2_set_up/set_up_paths.R")

4.1.2 Provide polarity πŸ•΅

polarity <- "POS"

4.1.3 Path of storing .mzML files (!!! have transferred by OpenMS) πŸ•΅

path_files_mzML <- "H:/From_SUND/Scripts/test/mzML_openms_MS2_POS"

4.1.4 Path of storing outputs πŸ•΅

path_outputs <- "H:/From_SUND/Scripts/outputs"

4.1.5 Parameters for preprocessing πŸ•΅

4.1.5.1 Filter the compounds eluted too early and too late πŸ•΅

rt_range <- c(0.5, 15)*60 

4.1.5.2 Peak picking πŸ•΅

    library(xcms)

    params <- list()
   
    params$CentWave <- CentWaveParam( ppm = 20,
                                      peakwidth =  c(0.05,0.2)*60,
                                      snthresh = 5,
                                      noise = 1500,
                                      prefilter = c(3,1500),
                                      integrate = 2,
                                      mzdiff = -0.001,
                                      verboseColumns = TRUE,
                                      fitgauss = TRUE,
                                      extendLengthMSW = TRUE
    )

4.1.5.3 Grouping for alignment πŸ•΅

    params$group1 <- PeakDensityParam( sampleGroups = rep(1, length(list.files(path_files_mzML))),
                                       binSize = 0.01,
                                       bw = 2,
                                       minSamples = 1,
                                       minFraction = 0.3,
                                       maxFeatures = 20
    )

4.1.5.4 Alignment πŸ•΅

    params$PeakAlign <- PeakGroupsParam(  smooth = "loess",
                                          span = 0.6,
                                          minFraction    = 0.8,
                                          family = "gaussian",
                                          extraPeaks = 3
                                          #subset = 
    )

4.1.5.5 Grouping again with stricter condition after alignment πŸ•΅

    params$group2 <- PeakDensityParam( sampleGroups = rep(1, length(list.files(path_files_mzML))),
                                       binSize = 0.01,
                                       bw = 1.8,
                                       minSamples = 1,
                                       minFraction = 0.6,
                                       maxFeatures = 20
    )

4.1.5.6 Gap filling πŸ•΅

    params$FillChromPeaksParam <- FillChromPeaksParam(expandMz = 0, 
                                                      expandRt = 0, 
                                                      ppm = 30)

4.1.5.7 Conducting XCMS

    raw <- readMSData(list.files(path_files_mzML, full.names = TRUE), 
                      mode = "onDisk")
   
   
   
    source(paste0(path_utils_prepro, "/XCMS.R"))

4.2 Extraction of MS2-spectra

The script 3a.Pipeline_extract_MS2.R is available under the folder of J:\CBMR\SUN-CBMR-Metabolomics\Workflow\Script\modules\MS2_preprocessing

4.2.1 Source paths on the server

source("H:/From_SUND/Scripts/utils/utils_MS2_set_up/set_up_paths.R")

4.2.2 Provide the polarity πŸ•΅

polarity <- "POS" #or "NEG"

4.2.3 Path of XCMSnExp object after conducting XCMS πŸ•΅

path_rds_xcms_XCMSnExp <- "H:/From_SUND/Scripts/outputs/xcms_XCMSnExp_POS.rds" 

4.2.4 Path of storing outputs πŸ•΅

path_outputs <- "H:/From_SUND/Scripts/outputs"

4.2.5 Parameters for extracting MS2 spectra πŸ•΅

4.2.5.1 Extraction: Threshold of rt and m/z for extraction and what type of object return πŸ•΅

params <- list()
params$expandRt <- 15
params$expandMz <- 0.01
params$return.type <- "Spectra"  #"MSpectra", "list", "List"

4.2.6 Combining: Threshold of mz, proportion among all samples, type for combining, function for combining mz and intensity πŸ•΅

# for getting consensus spectra for each feature at each collision energy
params$tolerance <- 0.005
params$peaks <- "intersect"  #"union" #report all peaks from all input spectra 
params$minProp <- 0.7
params$intfun <- median
params$mzfun <- median

4.2.8 (Optional but reconmended) Reduce the number of the MS2 spectra ✈️

If not, please do not run these two lines
npeaks_reduce <- "TRUE"
npeaks_remaining <- 60

4.2.9 Conduct the extraction of MS2 spectra

source(paste0(path_utils_prepro, "/extract_MS2.R"))

4.3 (Optional) CAMERA

If you would like to get more information related to adduct, please run these lines, but you have to have at least 6 samples

The script 4.(optional)Pipeline_CAMERA.R is available under the folder of J:\CBMR\SUN-CBMR-Metabolomics\Workflow\Script\modules\MS2_preprocessing

4.3.1 Source paths on the server

source("H:/From_SUND/Scripts/utils/utils_MS2_set_up/set_up_paths.R")

4.3.2 Provide the polarity πŸ•΅

polarity <- "POS"

4.3.3 Path of storing outputs πŸ•΅

path_outputs <- "H:/From_SUND/Scripts/outputs"

4.3.4 Read XCMSnExp object after conducting XCMS πŸ•΅

path_rds_xcms_XCMSnExp <- "H:/From_SUND/Scripts/outputs/xcms_XCMSnExp_POS.rds" 

4.3.5 Parameters for CAMERA πŸ•΅

4.3.5.1 Grouping πŸ•΅

params <- list()
params$perfwhm <- 0.6          #percentage of the width of the FWHM (full width at half maximum) of the peak
params$group_intval <- "into"  #the type of intensity value
params$sigma <- 6              #multiplier of the standard deviation

4.3.5.2 correlation πŸ•΅

params$calcIso <- FALSE      #include isotope detection information for graph clustering
params$calcCiS <- FALSE      #calculate correlation inside samples
params$calsCaS <- TRUE       #calculate correlation across samples
params$cor_eic_th <- 0.7     #correlation threshold for RIC correlation
params$cor_exp_th <- 0.3     #threshold for intensity correlations across samples
params$pval <- 1E-6          #p-value threshold for testing correlation of significance
params$graphMethod <- "lpc"  #clustering method for resulting correlation graph (hcs, lpc)
params$intval_cor <- "into"  #selection of the intensity values

4.3.5.3 find isotopes πŸ•΅

params$find_isoto_ppm <- 10         #threshold for m/z
params$mzabs <- 0.01                #allowed variance for the search
params$find_isoto_intval <- "into"  #choose intensity values for C12/C13 check

4.3.5.4 find adducts πŸ•΅

params$find_addu_ppm <- 10    
params$mzabs <- 0.01
params$multiplier <- 4         #highest number(n) of allowed clusterion [nM+ion]

4.3.6 Conduct CAMERA

source(paste0(path_utils_prepro, "/CAMERA.R"))