Anexo Líneas de comandos

(1)

Anexo

Líneas de comandos

(2)

Anexo: Líneas de comandos

LC1. Líneas de comandos de R para validar las identificaciones con pRatio.

### params

deltaMassThreshold = 15 # in ppm

deltaMassAreas = 5 # number of jumps: 1,3 or 5 input="C:Ubicación/Resultado/Búsqueda..msf"

output=" C:Ubicación/Carpeta/Resultados./Búsqueda_results.txt”

library(stringi) library(readr) library(RSQLite) library(plyr) library(Peptides)

db=dbConnect(SQLite(), dbname=input)

queryMain = "select p.peptideid, fi.filename, sh.firstscan, sh.lastscan, sh.charge, p.sequence, sh.mass, ps.scorevalue, sh.retentiontime, p.searchenginerank, p.deltascore from peptides p, peptideScores ps, spectrumHeaders sh, massPeaks mp, workFlowInputFiles fi,

processingNodeScores scoreNames where p.peptideid = ps.peptideid and sh.spectrumid = p.spectrumid

(3)

and (fi.fileid = mp.fileid or mp.fileid = ‐1) and mp.masspeakid = sh.masspeakid and scoreNames.scoreid = ps.scoreid and scoreNames.ScoreName = 'Xcorr' and p.searchenginerank = 1

and ps.scorevalue > 1.5 order by

fi.filename desc, sh.firstscan asc, sh.lastscan asc, sh.charge asc, ps.scorevalue desc"

data=dbGetQuery(conn = db, queryMain)

queryModifications = "select p.peptideid,

paam.aminoacidmodificationid, paam.position,

p.sequence,

aam.modificationname, aam.deltamass from peptides p, peptideScores ps, spectrumHeaders sh,

peptidesaminoacidmodifications paam, aminoacidmodifications aam

where p.peptideid = paam.peptideid and sh.spectrumid = p.spectrumid and p.peptideid = ps.peptideid

and aam.aminoacidmodificationid = paam.aminoacidmodificationid and p.searchenginerank = 1

and ps.scorevalue > 1.5

order by p.peptideid ASC, paam.position ASC"

dataMod=dbGetQuery(conn = db, queryModifications)

queryProteinInfo = "select

(4)

pq.peptideid, p.sequence, pq.proteinid, q.description

from peptidesProteins pq, spectrumHeaders sh, peptides p,

peptideScores ps, proteinAnnotations q

where pq.peptideid = p.peptideid and p.peptideid = ps.peptideid and pq.proteinid = q.proteinid and sh.spectrumid = p.spectrumid and p.searchenginerank = 1 and ps.scorevalue > 1.5 order by pq.peptideid asc"

dataProt=dbGetQuery(conn = db, queryProteinInfo)

## prepare DATA: mods & prots ptmAnnotation <‐ function(x) {

pep<‐x[1,]$Sequence b<‐0

p<‐""

modNum<‐1 modMass<‐0 for (i in x$Position) {

p <‐ stri_flatten(c(p,substr(pep,b,i+1),"[",x[modNum,]$DeltaMass,"]"),collapse="") modMass<‐modMass + x[modNum,]$DeltaMass

#p <‐ paste(p,substr(pep,b,i+1),"[",x[modNum,]$DeltaMass,"]",sep="") b <‐ i+2

modNum <‐ modNum+1 }

#p <‐ paste(p,substr(pep,b,nchar(pep)),sep="")

p <‐ stri_flatten(c(p,substr(pep,b,nchar(pep))),collapse="") return(c(p,modMass))

}

(5)

dataModAnnotation <‐ ddply(dataMod,.(PeptideID),ptmAnnotation) colnames(dataModAnnotation) <‐ c("PeptideID","Sequence","modMass") dataModAnnotation$modMass <‐ as.numeric(dataModAnnotation$modMass)

# mix unmodified and modified

dataModTmp <‐ merge(unique(data[,c("PeptideID","Sequence")]),dataModAnnotation,by =

"PeptideID",all.x=TRUE)

dataModTmp[is.na(dataModTmp["Sequence.y"]),"Sequence.y"] <‐

dataModTmp[is.na(dataModTmp["Sequence.y"]),"Sequence.x"]

dataModAll <‐ dataModTmp[,c("PeptideID","Sequence.y","modMass")]

colnames(dataModAll) <‐ c("PeptideID","SequenceMod","modMass")

redundances <‐ aggregate(Description ~ PeptideID, data=dataProt, paste, collapse = " ‐‐ ") colnames(redundances) <‐ c("PeptideID","Redundances")

dataProt.u <‐ dataProt[!duplicated(dataProt["PeptideID"]),]

peptideProt <‐ merge(dataProt.u[,c("PeptideID","Description")], redundances, by="PeptideID", all.x=TRUE)

#*****

dataAll <‐ cbind(data, dataModAll$"SequenceMod", dataModAll$"modMass", peptideProt[ , ‐ which(names(peptideProt) %in% c("PeptideID"))])

colnames(dataAll) <‐

c("PeptideID","FileName","FirstScan","LastScan","Charge","Sequence","Mass","ScoreValue","RetentionT ime","SearchEngineRank","DeltaScore","SequenceMod","modMass","Description","Redundances")

## Calculate theoretical mass

dataAll[is.na(dataAll[,"modMass"]),]$modMass <‐ 0

dataAll <‐ cbind(dataAll,as.data.frame(unlist(lapply(dataAll[,c("Sequence")], mw, monoisotopic=TRUE)))) names(dataAll)[length(names(dataAll))]<‐"Theoretical"

dataAll$Theoretical <‐ dataAll$Theoretical + 1.00727647

dataAll <‐ cbind(dataAll, dataAll$Theoretical + dataAll$modMass + 229.162932) names(dataAll)[length(names(dataAll))]<‐"TheoreticalModTag"

dataAll <‐ cbind(dataAll, abs(dataAll$Mass ‐ dataAll$Theoretical ‐ dataAll$modMass ‐ 229.162932) / dataAll$Mass * 1e6)

names(dataAll)[length(names(dataAll))]<‐"deltaMassTargetppm"

## Decoy tagging decoy_tag = "_INV_"

(6)

isDecoy <‐ rep(0, dim(dataAll)[1]) isTarget <‐ rep(1, dim(dataAll)[1]) protein <‐ dataAll[,'Description']

index <‐ grep(decoy_tag,protein,fixed=TRUE) isDecoy[index] <‐ 1

isTarget[index] <‐ 0

dataAll <‐ cbind(dataAll,isDecoy,isTarget)

## filter by deltaMass

filterDeltaMass <‐ function(x, deltaMassThreshold, deltaMassAreas) {

TheoreticalModTag=x[1]

Mass=x[2]

ScoreValue=x[3]

jump1_ppm = abs(TheoreticalModTag ‐ Mass) / TheoreticalModTag * 1e6 if (jump1_ppm >= deltaMassThreshold)

{

if (deltaMassAreas <= 1) { return(0.01) } # jump 1 >= threshold else

{

MassCorr <‐ Mass ‐ 1.0033

jump23_ppm = abs(TheoreticalModTag ‐ MassCorr) / TheoreticalModTag * 1e6 if (jump23_ppm >= deltaMassThreshold)

{

if (deltaMassAreas <= 3) { return(0.01) } # jump 23 >= threshold else

{

MassCorr2 <‐ Mass ‐ 1.0033

jump45_ppm = abs(TheoreticalModTag ‐ MassCorr2) / TheoreticalModTag * 1e6 if (jump45_ppm >= deltaMassThreshold) {return (0.01)} # jump 45 >= threshold else {return (ScoreValue)} # jump 45 < threshold

} } else {

return (ScoreValue) # jump 23 < threshold }

} }

(7)

else {

return(ScoreValue) # jump 1 < threshold }

}

jump1ScoreValue <‐as.data.frame(unlist(apply(dataAll[,c("TheoreticalModTag","Mass","ScoreValue")], 1, filterDeltaMass, deltaMassThreshold=deltaMassThreshold, deltaMassAreas=deltaMassAreas))) colnames(jump1ScoreValue) <‐ "ScoreValueAfterJUMP"

#dataAll$ScoreValue<‐jump1ScoreValue$ScoreValueAfterJUMP

## Add xcorr_c n = dim(dataAll)[1]

xcorr_c <‐ function(x) { r=1

if(as.numeric(x[1])>2) {r=1.22}

xcorr_c = log((as.numeric(x[2]))/r)/log(2*nchar(as.character(x[3]))) return (xcorr_c)

}

dataAll <‐ cbind(dataAll,apply(dataAll[,c("Charge","ScoreValue","Sequence")], 1, xcorr_c)) colnames(dataAll)[ncol(dataAll)] <‐ "xcorr_c"

# sort by xcorr_c

#dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$xcorr_c),]

##dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$ScoreValue),]

#tmp <‐ cbind(dataAll[, "xcorr_c"], dataAll[, "isDecoy"])

##tmp <‐ cbind(dataAll[, "ScoreValue"], dataAll[, "isDecoy"])

#FP <‐ cumsum(tmp[, 2])

#tmp <‐ cbind(tmp, FP)

#xcorr_cP <‐ unlist(lapply(1:n, function(x) (tmp[x, 'FP'])/n))

#dataAll <‐ cbind(dataAll, xcorr_cP)

### FDR ScoreValue

dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$ScoreValue),]

tmp <‐ cbind(dataAll[, "ScoreValue"], dataAll[, "isDecoy"], dataAll[, "isTarget"]) FP <‐ cumsum(tmp[, 2])

TP <‐ cumsum(tmp[, 3]) tmp <‐ cbind(tmp, FP, TP)

(8)

xcorr_FDR <‐ unlist(lapply(1:dim(dataAll)[1], function(x) (tmp[x, 'FP'])/(tmp[x, 'TP']))) dataAll <‐ cbind(dataAll, tmp, xcorr_FDR)

xcorr_FDRa <‐ unlist(lapply(1:dim(dataAll)[1], function(x) max(dataAll[1:x,"xcorr_FDR"]))) dataAll <‐ cbind(dataAll, xcorr_FDRa)

### FDR CALC

dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$xcorr_c),]

tmp <‐ cbind(dataAll[, "xcorr_c"], dataAll[, "isDecoy"], dataAll[, "isTarget"]) FP <‐ cumsum(tmp[, 2])

TP <‐ cumsum(tmp[, 3]) tmp <‐ cbind(tmp, FP, TP)

xcorr_c_FDR <‐ unlist(lapply(1:dim(dataAll)[1], function(x) (tmp[x, 'FP'])/(tmp[x, 'TP']))) dataAll <‐ cbind(dataAll, tmp, xcorr_c_FDR)

xcorr_c_FDRa <‐ unlist(lapply(1:dim(dataAll)[1], function(x) max(dataAll[1:x,"xcorr_c_FDR"]))) dataAll <‐ cbind(dataAll, xcorr_c_FDRa)

res <‐ dataAll[dataAll$xcorr_c_FDR < 0.01 & dataAll$isTarget == 1,]

#res <‐ dataAll[dataAll$xcorr_c_FDR < 0.01,]

fileName <‐ strsplit(data[1,"FileName"], fixed = TRUE, split =

"\\")[[1]][length(strsplit(data[1,"FileName"], fixed = TRUE, split = "\\")[[1]])]

pRatio <‐ "NA"; pI <‐ "NA"; Xcorr1Original <‐ "NA"; Xcorr2Search <‐ "NA"; Sp <‐ "NA"; SpRank <‐ "NA";

ProteinsWithPeptide <‐ "NA"

resPratio <‐

cbind(fileName,fileName,res[,c("FirstScan","LastScan","Charge")],pRatio,res[,c("xcorr_c_FDR","Descripti on","SequenceMod")],pI,res[,c("Mass","xcorr_c")],Xcorr1Original,Xcorr2Search,res[,"DeltaScore"],Sp,Sp Rank,ProteinsWithPeptide,res[,"Redundances"])

colnames(resPratio) <‐

c("FileName","RAWFile","FirstScan","LastScan","Charge","pRatio","FDR","FASTAProteinDescription","Se quence","pI","PrecursorMass","Xcorr1Search","Xcorr1Original","Xcorr2Search","DeltaCn","Sp","SpRank"

,"ProteinsWithPeptide","Redundances")

#SIMPLYFIED

resPratio <‐

resPratio[,c("FileName","RAWFile","FirstScan","LastScan","Charge","Sequence","FASTAProteinDescripti on","Xcorr1Search","FDR","Redundances")]

# pRatio modification parsing

resPratio$Sequence <‐ gsub('\\[57.021464\\]','*',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[125.047679\\]','_',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[15.994915\\]','#',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[229.162932\\]','@',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[113.08407\\]','^',resPratio$Sequence)

(9)

resPratio$Sequence <‐ gsub('\\[304.20536\\]','{',resPratio$Sequence)

write.table(resPratio,file = output,col.names = TRUE, row.names = FALSE,sep="\t", quote = FALSE)

#}

#write.table(res,file = output,col.names = TRUE, row.names = FALSE,sep="\t", quote = FALSE)

(10)

LC2. Archivo Congif.txt previo a la ejecución del Pre‐SanXoT.

##############################################################################

# Params to Pre‐SanXoT

##############################################################################

# Write the name of the Experiments Name to be analyzed Expto=c("iTRAQ_1","iTRAQ_2")

# Pattern of folders that contains the MSFs Patern=c("FR_*")

# Channels used in the Experiments ChannelID=c(1:8)

# Type of label used Typeoflabel=c("iTRAQ")

# Tags Used in the Experiment (All is "ALL")

TagsUsed=c("113","114","115","116","117","118","119","121")

# Control Tag ControlTag=c("121")

# Mean Tag Calculation MeanCalculation=c("FALSE")

# Mean Tags

MeanTags=c("126","131")

# First Tag FirstTag=c("113")

# Search Engine SearchEngine=c("2")

# Daemon used (TRUE or FALSE) Daemon=c("TRUE")

# Number of comparatives within the Experiment Comparatives=c("8")

# To Absolute Quantification (TRUE = Absolute Quantification, FALSE = Relative Quantification or BOTH = Both)

Absolute=c("BOTH")

# Calculate all against all tags Random=c("YES")

############################################################################

# Params to Tag File Maker

##############################################################################

# When you have only ONE integration Samples to Integrate (Expto_Tag)

(11)

Integration<‐c("SPIROS_128_N","SPIROS_128_C","SPIROS_129_N")

# Number of Integrations NOI=c("3")

# Integration Names and Tags Used Control<‐c("126","127_N","127_C") CR2<‐c("128_N","128_C","129_N") CR7<‐c("129_C","130_N","130_C") Integrations<‐c("Control", "CR2", "CR7")

##############################################################################

(12)

LC3. Líneas de comandos de R para ejecutar el Pre‐SanXoT.

(WD <‐ getwd())

if (!is.null(WD)) setwd(WD) source(paste0(WD,"/Config.txt"))

##############################################################################

# Pre‐SanXoT

##############################################################################

list.dirs <‐ function(path=".", pattern=NULL, all.dirs=FALSE, full.names=FALSE, ignore.case=FALSE) { # use full.names=TRUE to pass to file.info all <‐ list.files(path, pattern, all.dirs,

full.names=TRUE, recursive=FALSE, ignore.case) dirs <‐ all[file.info(all)$isdir]

# determine whether to return full names or just dir names if(isTRUE(full.names))

return(dirs) else

return(basename(dirs)) }

MSFfolders <‐ list.dirs(path = paste0(WD,"/",Expto,"/MSF"), pattern=Patern) library("RSQLite")

for (j in Expto){

for (k in MSFfolders){

files <‐ list.files(path = paste(WD,"/",j,"/MSF/",k,sep=""),pattern="*.msf") for (i in files) {

db=dbConnect(SQLite(), dbname=paste(WD,"/",j,"/MSF/",k,"/",i,sep="")) if(SearchEngine=="2"){

data=dbGetQuery(conn = db,

"SELECT [SpectrumHeaders].[FirstScan], [ReporterIonQuanResults].[Mass] AS [Mass2], [ReporterIonQuanResults].[Height] AS [Height1], [SpectrumHeaders].[RetentionTime],

[ReporterIonQuanResults].[QuanChannelID], [MassPeaks].[MassPeakID],

[Workflows].[WorkflowName] AS [FileName]

FROM [ReporterIonQuanResults]

INNER JOIN [SpectrumHeaders] ON [ReporterIonQuanResults].[SpectrumID] =

(13)

[SpectrumHeaders].[SpectrumID]

INNER JOIN [MassPeaks] ON [MassPeaks].[MassPeakID] = [SpectrumHeaders].[MassPeakID]

INNER JOIN [WorkflowInputFiles] ON [MassPeaks].[FileID] = [WorkflowInputFiles].[FileID]

INNER JOIN [Workflows] ON [WorkflowInputFiles].[WorkflowID] = [Workflows].[WorkflowID]

WHERE [ReporterIonQuanResults].[Mass] > 0") } else {

data=dbGetQuery(conn = db,

"SELECT [SpectrumHeaders].[FirstScan], [ReporterIonQuanResults].[Mass] AS [Mass2], [ReporterIonQuanResults].[Height] AS [Height1], [SpectrumHeaders].[RetentionTime],

[ReporterIonQuanResults].[QuanChannelID], [MassPeaks].[MassPeakID],

[WorkflowInfo].[WorkflowName] AS [FileName]

FROM [ReporterIonQuanResults]

INNER JOIN [SpectrumHeaders] ON [ReporterIonQuanResults].[SpectrumID] = [SpectrumHeaders].[SpectrumID]

INNER JOIN [MassPeaks] ON [MassPeaks].[MassPeakID] = [SpectrumHeaders].[MassPeakID]

INNER JOIN [FileInfos] ON [MassPeaks].[FileID] = [FileInfos].[FileID], [WorkflowInfo]

WHERE [ReporterIonQuanResults].[Mass] > 0")}

i <‐ substr(i, 1, nchar(i) ‐ 4)

write.csv(data, file=paste(WD,"/",j,"/Pre‐SanXoT/",i,".csv",sep=""),row.names=FALSE)}}}

for (j in Expto){

files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="*.csv", full.names = TRUE) all_q <‐ do.call("rbind", lapply(files, read.csv, header = TRUE))

if (Daemon == "TRUE" | SearchEngine=="2"){

all_q$FileName<‐paste(all_q$FileName,".raw",sep="") } else {

all_q$FileName<‐substring(all_q$FileName,1,(nchar(as.character(all_q$FileName))‐4)) all_q$FileName<‐paste(all_q$FileName,".raw",sep="")}

write.table(all_q, file = paste(WD,"/",j,"/Pre‐SanXoT/Q‐all.txt",sep=""), sep="\t", row.names = FALSE)}

if (length(Expto)<2) { y<‐all_q

(14)

q_all<‐data.frame() for (i in ChannelID){

TMT<‐y[,"QuanChannelID",drop=FALSE]==i z<‐y[TMT,][,,drop=FALSE]

TMTgood<‐complete.cases(z) #posicion de NaN a<‐z[TMTgood,][,,drop=FALSE]

c<‐a[,c("FirstScan","Height1","FileName")]

colnames(c)=c("FirstScan",i,"FileName") if (nrow(q_all)==0){

q_all<‐c

} else {q_all<‐merge(q_all,c)}}

if (Typeoflabel=="TMT"){

if (TagsUsed=="ALL"){

colnames(q_all)=c("FirstScan","FileName","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X 129_C","X130_N","X130_C","X131")

} else {

colnames_TMT=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)

colnames_TMT=append(colnames_TMT, TagsUsed) colnames(q_all)=colnames_TMT

colnames_TMT=c("Raw_FirstScan")

colnames_TMT=append(colnames_TMT, TagsUsed)}

}else{

colnames(q_all)=c("FirstScan","FileName","X113","X114","X115","X116","X117","X118","X119","X121") } else {

colnames_iTRAQ=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)

colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed) colnames(q_all)=colnames_iTRAQ

colnames_iTRAQ=c("Raw_FirstScan")

colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed)}}

write.table(q_all, file = paste(WD,"/",j,"/Pre‐SanXoT/Q‐all.xls",sep=""), sep=",", row.names = FALSE) } else {

for (j in Expto){

files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="Q‐all.txt", full.names = TRUE) y<‐read.table(files, header=TRUE, sep="\t")

(15)

q_all<‐data.frame() for (i in ChannelID){

TMT<‐y[,"QuanChannelID",drop=FALSE]==i z<‐y[TMT,][,,drop=FALSE]

TMTgood<‐complete.cases(z) #posicion de NaN a<‐z[TMTgood,][,,drop=FALSE]

c<‐a[,c("FirstScan","Height1","FileName")]

colnames(c)=c("FirstScan",i,"FileName") if (nrow(q_all)==0){

q_all<‐c

} else {q_all<‐merge(q_all,c)}}

if (Typeoflabel=="TMT"){

colnames(q_all)=c("FirstScan","FileName","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X 129_C","X130_N","X130_C","X131")

} else {

colnames_TMT=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)

colnames_TMT=append(colnames_TMT, TagsUsed) colnames(q_all)=colnames_TMT

colnames_TMT=c("Raw_FirstScan")

colnames_TMT=append(colnames_TMT, TagsUsed)}

}else{

colnames(q_all)=c("FirstScan","FileName","X113","X114","X115","X116","X117","X118","X119","X121") } else {

colnames_iTRAQ=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)

colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed) colnames(q_all)=colnames_iTRAQ

colnames_iTRAQ=c("Raw_FirstScan")

colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed)}}

write.table(q_all, file = paste(WD,"/",j,"/Pre‐SanXoT/Q‐all.xls",sep=""), sep=",", row.names = FALSE)}}

for (j in Expto){

for (k in MSFfolders){

files <‐ list.files(path = paste(WD,"/",j,"/MSF/",k,sep=""),pattern="_results", full.names = TRUE) if (length(files) > 0){

(16)

ID_all<‐ read.table(files, sep="\t",comment.char = "¡",quote = "¿", header = TRUE) files <‐ list.files(path = paste(WD,"/",j,"/MSF/",k,sep=""),pattern="_results")

write.table(ID_all, file = paste(WD,"/",j,"/Pre‐SanXoT/",k,files,sep=""), sep="\t", row.names = FALSE)}}

files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="_results", full.names = TRUE) ID_all <‐ do.call("rbind", lapply(files, read.table, header = TRUE))

write.table(ID_all, file = paste(WD,"/",j,"/Pre‐SanXoT/ID‐all.txt",sep=""), sep="\t", row.names = FALSE)

file.remove(files)

files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="*.csv", full.names = TRUE) file.remove(files)}

if (length(Expto)<2) { k<‐q_all

x<‐ID_all

x$Raw_FirstScan<‐do.call(paste, c(x[c("RAWFile","FirstScan")], sep = "")) k$Raw_FirstScan<‐do.call(paste, c(k[c("FileName","FirstScan")], sep = "")) x$Raw_FirstScan<‐as.character(x$Raw_FirstScan)

k$Raw_FirstScan<‐as.character(k$Raw_FirstScan) if (Typeoflabel=="TMT"){

q<‐

k[,c("Raw_FirstScan","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X1 30_C","X131")]

} else {

q<‐k[,colnames_TMT]}

}else{

q<‐k[,c("Raw_FirstScan","X113","X114","X115","X116","X117","X118","X119","X121")]

} else {

q<‐k[,colnames_iTRAQ]}}

all<‐merge(x,q)

FirstTagIndex=as.numeric(grep(paste0("X",FirstTag), colnames(all)))

CalcIndex=trunc(seq(FirstTagIndex, by=(length(ChannelID)/as.numeric(Comparatives)), len = as.numeric(Comparatives)),1)

if (MeanTags=="ALL"){

if (Typeoflabel == "TMT"){

MeanTags<‐

c("X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X130_C","X131")

(17)

} else {

MeanTags<‐c("X113","X114","X115","X116","X117","X118","X119","X121")}}

for (i in CalcIndex){

ControlIndex=as.numeric(grep(paste0("X",ControlTag), colnames(all))) if (MeanCalculation == "TRUE") {

all$Mean <‐ rowMeans(all[,paste0("X",MeanTags)]) MeanIndex=as.numeric(grep("Mean", colnames(all))) all$newcolumn <‐ log2(all[,i]/all$Mean)

l <‐ substring(colnames(all)[i],2)

colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_Mean") } else {

all$newcolumn <‐ log2(all[,i]/all[,ControlIndex]) l <‐ substring(colnames(all)[i],2)

colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_",ControlTag)}

if (Absolute == "TRUE"){

all$newcolumn <‐ all[,c(i)]

colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS")}

if (Absolute == "FALSE"){

if (MeanCalculation == "TRUE"){

all$newcolumn <‐ apply(all[,c(i,MeanIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_Mean") } else {

all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}

if (Absolute == "BOTH"){

colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS") if (MeanCalculation == "TRUE"){

all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}}

write.table(all, file = paste(WD,"/",j,"/Pre‐SanXoT/ID‐q.txt",sep=""), sep="\t", row.names = FALSE) } else {

for (j in Expto){

files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="Q‐all.xls", full.names = TRUE) k<‐read.table(files, header=TRUE, sep=",")

(18)

files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="ID‐all.txt", full.names = TRUE) x<‐read.table(files, header=TRUE, sep="\t")

x$Raw_FirstScan<‐do.call(paste, c(x[c("RAWFile","FirstScan")], sep = "")) k$Raw_FirstScan<‐do.call(paste, c(k[c("FileName","FirstScan")], sep = "")) x$Raw_FirstScan<‐as.character(x$Raw_FirstScan)

k$Raw_FirstScan<‐as.character(k$Raw_FirstScan) if (Typeoflabel=="TMT"){

q<‐

k[,c("Raw_FirstScan","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X1 30_C","X131")]

} else {

q<‐k[,colnames_TMT]}

}else{

q<‐k[,c("Raw_FirstScan","X113","X114","X115","X116","X117","X118","X119","X121")]

} else {

q<‐k[,colnames_iTRAQ]}}

all<‐merge(x,q)

FirstTagIndex=as.numeric(grep(paste0("X",FirstTag), colnames(all)))

CalcIndex=trunc(seq(FirstTagIndex, by=(length(ChannelID)/as.numeric(Comparatives)), len = as.numeric(Comparatives)),1)

if (MeanTags=="ALL"){

MeanTags<‐

c("X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X130_C","X131") } else {

MeanTags<‐c("X113","X114","X115","X116","X117","X118","X119","X121")}}

ControlIndex=as.numeric(grep(paste0("X",ControlTag), colnames(all))) if (MeanCalculation == "TRUE") {

all$Mean <‐ rowMeans(all[,paste0("X",MeanTags)]) MeanIndex=as.numeric(grep("Mean", colnames(all))) all$newcolumn <‐ log2(all[,i]/all$Mean)

colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_Mean") } else {

all$newcolumn <‐ log2(all[,i]/all[,ControlIndex])

(19)

colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_",ControlTag)}

if (Absolute == "TRUE"){

colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS")}

if (Absolute == "FALSE"){

if (MeanCalculation == "TRUE"){

all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}

if (Absolute == "BOTH"){

colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS") if (MeanCalculation == "TRUE"){

all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}}}}

if (Random == "YES"){

for (m in CalcIndex){

all$newcolumn <‐ log2(all[,i]/all[,m]) l <‐ substring(colnames(all)[i],2) o <‐ substring(colnames(all)[m],2)

colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_",o) all$newcolumn <‐ apply(all[,c(i,m)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",o)}}}

write.table(all, file = paste(WD,"/",j,"/Pre‐SanXoT/ID‐q.txt",sep=""), sep="\t", row.names = FALSE)

##############################################################################

# Tag file Maker

##############################################################################

TagsUsed<‐c("126","127_N","127_C","128_N","128_C","129_N","129_C","130_N","130_C","131")

(20)

} else {

TagsUsed<‐c("113","114","115","116","117","118","119","121")}}

Tag<‐c()

for (i in TagsUsed){

for (j in Expto){

tags_temp<‐paste(j,i,sep="_") if (NROW(Tag)==0){

Tag<‐tags_temp

} else {Tag<‐rbind(Tag,tags_temp)}}}

Path<‐c()

for (i in TagsUsed){

for (j in Expto){

path_temp<‐paste(WD,"/",j,"/SanXoT/",i,"/data/Q2A_lowerNormV.xls",sep="") if (NROW(Tag)==0){

Path<‐path_temp

} else {Path<‐rbind(Path,path_temp)}}}

Tag<‐as.data.frame(Tag) row.names(Tag) <‐ NULL colnames(Tag) <‐ "Tag"

Path<‐as.data.frame(Path) row.names(Path) <‐ NULL colnames(Path) <‐ "Path"

Tag_file_temp<‐cbind(Tag,Path)

write.table(Tag_file_temp, file=paste0(WD,"/Integration/Tag_file_temp.txt"),sep="\t", row.names = FALSE)

if (NOI == 1){

Tag_file<‐Tag_file_temp[Tag_file_temp$Tag %in% Integration,]

write.table(Tag_file, file=paste0(WD,"/Integration/Tag_file.txt"),sep="\t", row.names = FALSE) } else {

for (j in Integrations) { tag<‐paste(j,get(j),sep="_") for (i in tag){

tag<‐substring(tag,(nchar(j)+2),nchar(tag)) tag<‐paste(Expto,tag,sep="_")

Tag_file<‐Tag_file_temp[Tag_file_temp$Tag %in% tag, ] if (nrow(Tag_file)>1){

write.table(Tag_file, file=paste0(WD,"/Integration/",j,"_Tag_file.txt"),sep="\t", row.names = FALSE)}}}}

(21)

LC4. Líneas de comandos de C para ejecutar análisis con SanXoT.

set /p BaseFolder=Base Folder (without ""):

cd "C:\Carpeta\Programas\standalone exes"

set Q2CRelationFile="C:\Ubicación\Archivo\Relaciones\Proteina‐Categoría.txt C:

set /p Data=Path of ID‐q_comet:

set aljamiaSData_MOD=aljamia.exe ‐x"%Data%" ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"Scans_uncalibrated_MOD.xls" ‐ aS2P_inOUTs_uncalibrated_MOD ‐i"[Raw_FirstScan]‐[Charge]" ‐j"[Xs_%%i_121]" ‐k"[Vs_%%i_121]" ‐ l"PTM" ‐f"[Modified]== TRUE" ‐R1

set aljamiaSData_noMOD=aljamia.exe ‐x"%Data%" ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"Scans_uncalibrated_noMOD.xls" ‐ aS2P_inOUTs_uncalibrated_noMOD ‐i"[Raw_FirstScan]‐[Charge]" ‐j"[Xs_%%i_121]" ‐k"[Vs_%%i_121]" ‐ f"[Modified]== FALSE" ‐l"No_MOD" ‐R1

set aljamiaS2PRels_noMOD=aljamia.exe ‐x"%Data%" ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"S2P_RelationsFile_noMOD.xls" ‐ aS2P_RelationsFile_noMOD ‐i"[Sequence]" ‐j"[Raw_FirstScan]‐[Charge]" ‐f"[Modified]== FALSE" ‐ k"No_MOD" ‐R1

set aljamiaS2PRels_MOD=aljamia.exe ‐x"%Data%" ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"S2P_RelationsFile_MOD.xls" ‐ aS2P_RelationsFile_MOD ‐i"[Sequence]" ‐j"[Raw_FirstScan]‐[Charge]" ‐k"PTM" ‐f"[Modified]== TRUE" ‐ R1

set copyS2PRels=copy

"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile_noMOD.xls"+"%BaseFolder

%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile_MOD.xls"

"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls"

set aljamiaP2QRels_noMOD=aljamia.exe ‐x"%Data%" ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"P2Q_RelationsFile_noMOD.xls" ‐ aP2Q_RelationsFile_noMOD ‐i"[FASTAProteinDescription]" ‐j"[Sequence]" ‐f"[Modified]== FALSE" ‐ k"No_MOD" ‐R1

set aljamiaP2QRels_MOD=aljamia.exe ‐x"%Data%" ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"P2Q_RelationsFile_MOD.xls" ‐

(22)

aP2Q_RelationsFile_MOD ‐i"[FASTAProteinDescription]" ‐j"[Sequence]" ‐k"PTM" ‐f"[Modified]== TRUE" ‐ R1

set copyP2QRels=copy

"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile_noMOD.xls"+"%BaseFolder

%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile_MOD.xls"

"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls"

set klibrate_noMOD=klibrate.exe ‐

d"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\Scans_uncalibrated_noMOD.xls" ‐ r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐ p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐aS2P_inOUTs_calibrated ‐o"scan_noMOD.xls" ‐g ‐R2

set klibrate_MOD=klibrate.exe ‐

d"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\Scans_uncalibrated_MOD.xls" ‐ r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐ p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐aS2P_inOUTs_calibrated_MOD ‐o"scan_MOD.xls" ‐g ‐ K"S2P_inOUTs_calibrated_infoFile.txt" ‐V"S2P_inOUTs_calibrated_infoFile.txt" ‐f ‐w20

set sanxotS2P_in_outs_NM=sanxot.exe ‐aS2P_inOuts_noMOD ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_noMOD.xls" ‐

r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐g

set sanxotsieveSP_NM=sanxotsieve.exe ‐aS2POuts_noMOD ‐

r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐f0.01 ‐ V"S2P_inOuts_noMOD_infoFile.txt"

set sanxotS2P_no_outs_NM=sanxot.exe ‐aS2P_noOuts_noMOD ‐

r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\S2POuts_noMOD_tagged.xls" ‐o"peptide_noMOD.xls" ‐g

‐V"S2P_inOuts_noMOD_infoFile.txt" ‐f ‐‐tags="!out"

set sanxotS2P_in_outs_PTM=sanxot.exe ‐aS2P_inOuts_MOD ‐

p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_MOD.xls" ‐

r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐g ‐f ‐ V"S2P_inOuts_noMOD_infoFile.txt"

set sanxotsieveSP_PTM=sanxotsieve.exe ‐aS2POuts_MOD ‐

r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐f0.01 ‐ V"S2P_inOuts_noMOD_infoFile.txt"

(23)

set sanxotS2P_no_outs_PTM=sanxot.exe ‐aS2P_noOuts_MOD ‐

r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\S2POuts_MOD_tagged.xls" ‐o"peptide_MOD.xls" ‐g ‐ V"S2P_inOuts_noMOD_infoFile.txt" ‐f ‐‐tags="!out"

set copypeptide=copy

"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\peptide_noMOD.xls"+"%BaseFolder%\%%j\SanXoT_MOD

\%%i\data\peptide_MOD.xls" "%BaseFolder%\%%j\SanXoT_MOD\%%i\data\peptide.xls"

set sanxotP2Q_in_outs=sanxot.exe ‐aP2Q_inOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls" ‐g ‐ v0.01 ‐‐tags="!PTM"

set sanxotsievePQ=sanxotsieve.exe ‐aP2QOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls" ‐f0.01 ‐ V"P2Q_inOuts_infoFile.txt"

set sanxotP2Q_no_outs=sanxot.exe ‐aP2Q_noOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\P2QOuts_tagged.xls" ‐o"protein.xls" ‐g ‐f

‐V"P2Q_inOuts_infoFile.txt" ‐‐tags="!PTM & !out"

set sanxotP2A_in_outs=sanxot.exe ‐aP2A_inOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐C ‐g

set sanxotQ2C_in_outs=sanxot.exe ‐aQ2C_inOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"protein.xls" ‐r%Q2CRelationFile% ‐g

set sanxotsieveQC=sanxotsieve.exe ‐aQ2COuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"protein.xls" ‐r%Q2CRelationFile% ‐f0.01 ‐V"Q2C_inOuts_infoFile.txt"

set sanxotQ2C_no_outs=sanxot.exe ‐aQ2C_noOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"protein.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\Q2COuts_tagged.xls" ‐o"category.xls" ‐g ‐ V"Q2C_inOuts_infoFile.txt" ‐f ‐‐tags="!out"

set sanxotQ2A=sanxot.exe ‐aQ2A ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"protein.xls" ‐C ‐ V"Q2C_inOuts_infoFile.txt" ‐f ‐g

set sanxotC2A=sanxot.exe ‐aC2A ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"category.xls" ‐C ‐ v0 ‐f ‐g

for %%j in (iTRAQ_1) do (

for %%i in (113 114 115 116 117 118 119) do (

(24)

if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\data" md

"%BaseFolder%\%%j\SanXoT_MOD\%%i\data"

if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" md

"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data"

start "CALCULATION_%%j_%%i" cmd.exe /K "(%aljamiaSData_MOD% &

%aljamiaSData_noMOD% & %aljamiaS2PRels_noMOD% & %aljamiaS2PRels_MOD% & %copyS2PRels%

& %aljamiaP2QRels_noMOD% & %aljamiaP2QRels_MOD% & %copyP2QRels%)"

))

:wait_loop1

for %%i in (113 114 115 116 117 118 119) do (

if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls"

goto wait_loop1

))

for %%i in (113 114 115 116 117 118 119) do (

start "CALCULATION_%%j_%%i" cmd.exe /K "(%klibrate_noMOD% & %klibrate_MOD% &

%sanxotS2P_in_outs_NM% & %sanxotsieveSP_NM% & %sanxotS2P_no_outs_NM% &

%sanxotS2P_in_outs_PTM% & %sanxotsieveSP_PTM% & %sanxotS2P_no_outs_PTM% &

%copypeptide% & %sanxotP2Q_in_outs% & %sanxotsievePQ%)"

))

:wait_loop2

for %%i in (113 114 115 116 117 118 119) do (

if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\data\P2QOuts_tagged.xls" goto wait_loop2

))

for %%i in (113 114 115 116 117 118 119) do (

start "CALCULATION_%%j_%%i" cmd.exe /K %sanxotP2Q_no_outs%

))

:wait_loop3

for %%i in (113 114 115 116 117 118 119) do (

if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\data\protein.xls" goto wait_loop3

))

for %%i in (113 114 115 116 117 118 119) do (

(25)

start "CALCULATION_%%j_%%i" cmd.exe /K "(%sanxotP2A_in_outs% & %sanxotQ2C_in_outs%

& %sanxotsieveQC% & %sanxotQ2C_no_outs% & %sanxotQ2A% & %sanxotC2A%)"

))