Anexo
Líneas de comandos
Anexo: Líneas de comandos
LC1. Líneas de comandos de R para validar las identificaciones con pRatio.
### params
deltaMassThreshold = 15 # in ppm
deltaMassAreas = 5 # number of jumps: 1,3 or 5 input="C:Ubicación/Resultado/Búsqueda..msf"
output=" C:Ubicación/Carpeta/Resultados./Búsqueda_results.txt”
library(stringi) library(readr) library(RSQLite) library(plyr) library(Peptides)
db=dbConnect(SQLite(), dbname=input)
queryMain = "select p.peptideid, fi.filename, sh.firstscan, sh.lastscan, sh.charge, p.sequence, sh.mass, ps.scorevalue, sh.retentiontime, p.searchenginerank, p.deltascore from peptides p, peptideScores ps, spectrumHeaders sh, massPeaks mp, workFlowInputFiles fi,
processingNodeScores scoreNames where p.peptideid = ps.peptideid and sh.spectrumid = p.spectrumid
and (fi.fileid = mp.fileid or mp.fileid = ‐1) and mp.masspeakid = sh.masspeakid and scoreNames.scoreid = ps.scoreid and scoreNames.ScoreName = 'Xcorr' and p.searchenginerank = 1
and ps.scorevalue > 1.5 order by
fi.filename desc, sh.firstscan asc, sh.lastscan asc, sh.charge asc, ps.scorevalue desc"
data=dbGetQuery(conn = db, queryMain)
queryModifications = "select p.peptideid,
paam.aminoacidmodificationid, paam.position,
p.sequence,
aam.modificationname, aam.deltamass from peptides p, peptideScores ps, spectrumHeaders sh,
peptidesaminoacidmodifications paam, aminoacidmodifications aam
where p.peptideid = paam.peptideid and sh.spectrumid = p.spectrumid and p.peptideid = ps.peptideid
and aam.aminoacidmodificationid = paam.aminoacidmodificationid and p.searchenginerank = 1
and ps.scorevalue > 1.5
order by p.peptideid ASC, paam.position ASC"
dataMod=dbGetQuery(conn = db, queryModifications)
queryProteinInfo = "select
pq.peptideid, p.sequence, pq.proteinid, q.description
from peptidesProteins pq, spectrumHeaders sh, peptides p,
peptideScores ps, proteinAnnotations q
where pq.peptideid = p.peptideid and p.peptideid = ps.peptideid and pq.proteinid = q.proteinid and sh.spectrumid = p.spectrumid and p.searchenginerank = 1 and ps.scorevalue > 1.5 order by pq.peptideid asc"
dataProt=dbGetQuery(conn = db, queryProteinInfo)
## prepare DATA: mods & prots ptmAnnotation <‐ function(x) {
pep<‐x[1,]$Sequence b<‐0
p<‐""
modNum<‐1 modMass<‐0 for (i in x$Position) {
p <‐ stri_flatten(c(p,substr(pep,b,i+1),"[",x[modNum,]$DeltaMass,"]"),collapse="") modMass<‐modMass + x[modNum,]$DeltaMass
#p <‐ paste(p,substr(pep,b,i+1),"[",x[modNum,]$DeltaMass,"]",sep="") b <‐ i+2
modNum <‐ modNum+1 }
#p <‐ paste(p,substr(pep,b,nchar(pep)),sep="")
p <‐ stri_flatten(c(p,substr(pep,b,nchar(pep))),collapse="") return(c(p,modMass))
}
dataModAnnotation <‐ ddply(dataMod,.(PeptideID),ptmAnnotation) colnames(dataModAnnotation) <‐ c("PeptideID","Sequence","modMass") dataModAnnotation$modMass <‐ as.numeric(dataModAnnotation$modMass)
# mix unmodified and modified
dataModTmp <‐ merge(unique(data[,c("PeptideID","Sequence")]),dataModAnnotation,by =
"PeptideID",all.x=TRUE)
dataModTmp[is.na(dataModTmp["Sequence.y"]),"Sequence.y"] <‐
dataModTmp[is.na(dataModTmp["Sequence.y"]),"Sequence.x"]
dataModAll <‐ dataModTmp[,c("PeptideID","Sequence.y","modMass")]
colnames(dataModAll) <‐ c("PeptideID","SequenceMod","modMass")
redundances <‐ aggregate(Description ~ PeptideID, data=dataProt, paste, collapse = " ‐‐ ") colnames(redundances) <‐ c("PeptideID","Redundances")
dataProt.u <‐ dataProt[!duplicated(dataProt["PeptideID"]),]
peptideProt <‐ merge(dataProt.u[,c("PeptideID","Description")], redundances, by="PeptideID", all.x=TRUE)
#*****
dataAll <‐ cbind(data, dataModAll$"SequenceMod", dataModAll$"modMass", peptideProt[ , ‐ which(names(peptideProt) %in% c("PeptideID"))])
colnames(dataAll) <‐
c("PeptideID","FileName","FirstScan","LastScan","Charge","Sequence","Mass","ScoreValue","RetentionT ime","SearchEngineRank","DeltaScore","SequenceMod","modMass","Description","Redundances")
## Calculate theoretical mass
dataAll[is.na(dataAll[,"modMass"]),]$modMass <‐ 0
dataAll <‐ cbind(dataAll,as.data.frame(unlist(lapply(dataAll[,c("Sequence")], mw, monoisotopic=TRUE)))) names(dataAll)[length(names(dataAll))]<‐"Theoretical"
dataAll$Theoretical <‐ dataAll$Theoretical + 1.00727647
dataAll <‐ cbind(dataAll, dataAll$Theoretical + dataAll$modMass + 229.162932) names(dataAll)[length(names(dataAll))]<‐"TheoreticalModTag"
dataAll <‐ cbind(dataAll, abs(dataAll$Mass ‐ dataAll$Theoretical ‐ dataAll$modMass ‐ 229.162932) / dataAll$Mass * 1e6)
names(dataAll)[length(names(dataAll))]<‐"deltaMassTargetppm"
## Decoy tagging decoy_tag = "_INV_"
isDecoy <‐ rep(0, dim(dataAll)[1]) isTarget <‐ rep(1, dim(dataAll)[1]) protein <‐ dataAll[,'Description']
index <‐ grep(decoy_tag,protein,fixed=TRUE) isDecoy[index] <‐ 1
isTarget[index] <‐ 0
dataAll <‐ cbind(dataAll,isDecoy,isTarget)
## filter by deltaMass
filterDeltaMass <‐ function(x, deltaMassThreshold, deltaMassAreas) {
TheoreticalModTag=x[1]
Mass=x[2]
ScoreValue=x[3]
jump1_ppm = abs(TheoreticalModTag ‐ Mass) / TheoreticalModTag * 1e6 if (jump1_ppm >= deltaMassThreshold)
{
if (deltaMassAreas <= 1) { return(0.01) } # jump 1 >= threshold else
{
MassCorr <‐ Mass ‐ 1.0033
jump23_ppm = abs(TheoreticalModTag ‐ MassCorr) / TheoreticalModTag * 1e6 if (jump23_ppm >= deltaMassThreshold)
{
if (deltaMassAreas <= 3) { return(0.01) } # jump 23 >= threshold else
{
MassCorr2 <‐ Mass ‐ 1.0033
jump45_ppm = abs(TheoreticalModTag ‐ MassCorr2) / TheoreticalModTag * 1e6 if (jump45_ppm >= deltaMassThreshold) {return (0.01)} # jump 45 >= threshold else {return (ScoreValue)} # jump 45 < threshold
} } else {
return (ScoreValue) # jump 23 < threshold }
} }
else {
return(ScoreValue) # jump 1 < threshold }
}
jump1ScoreValue <‐as.data.frame(unlist(apply(dataAll[,c("TheoreticalModTag","Mass","ScoreValue")], 1, filterDeltaMass, deltaMassThreshold=deltaMassThreshold, deltaMassAreas=deltaMassAreas))) colnames(jump1ScoreValue) <‐ "ScoreValueAfterJUMP"
#dataAll$ScoreValue<‐jump1ScoreValue$ScoreValueAfterJUMP
## Add xcorr_c n = dim(dataAll)[1]
xcorr_c <‐ function(x) { r=1
if(as.numeric(x[1])>2) {r=1.22}
xcorr_c = log((as.numeric(x[2]))/r)/log(2*nchar(as.character(x[3]))) return (xcorr_c)
}
dataAll <‐ cbind(dataAll,apply(dataAll[,c("Charge","ScoreValue","Sequence")], 1, xcorr_c)) colnames(dataAll)[ncol(dataAll)] <‐ "xcorr_c"
# sort by xcorr_c
#dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$xcorr_c),]
##dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$ScoreValue),]
#tmp <‐ cbind(dataAll[, "xcorr_c"], dataAll[, "isDecoy"])
##tmp <‐ cbind(dataAll[, "ScoreValue"], dataAll[, "isDecoy"])
#FP <‐ cumsum(tmp[, 2])
#tmp <‐ cbind(tmp, FP)
#xcorr_cP <‐ unlist(lapply(1:n, function(x) (tmp[x, 'FP'])/n))
#dataAll <‐ cbind(dataAll, xcorr_cP)
### FDR ScoreValue
dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$ScoreValue),]
tmp <‐ cbind(dataAll[, "ScoreValue"], dataAll[, "isDecoy"], dataAll[, "isTarget"]) FP <‐ cumsum(tmp[, 2])
TP <‐ cumsum(tmp[, 3]) tmp <‐ cbind(tmp, FP, TP)
xcorr_FDR <‐ unlist(lapply(1:dim(dataAll)[1], function(x) (tmp[x, 'FP'])/(tmp[x, 'TP']))) dataAll <‐ cbind(dataAll, tmp, xcorr_FDR)
xcorr_FDRa <‐ unlist(lapply(1:dim(dataAll)[1], function(x) max(dataAll[1:x,"xcorr_FDR"]))) dataAll <‐ cbind(dataAll, xcorr_FDRa)
### FDR CALC
dataAll <‐ dataAll[order(decreasing = TRUE,dataAll$xcorr_c),]
tmp <‐ cbind(dataAll[, "xcorr_c"], dataAll[, "isDecoy"], dataAll[, "isTarget"]) FP <‐ cumsum(tmp[, 2])
TP <‐ cumsum(tmp[, 3]) tmp <‐ cbind(tmp, FP, TP)
xcorr_c_FDR <‐ unlist(lapply(1:dim(dataAll)[1], function(x) (tmp[x, 'FP'])/(tmp[x, 'TP']))) dataAll <‐ cbind(dataAll, tmp, xcorr_c_FDR)
xcorr_c_FDRa <‐ unlist(lapply(1:dim(dataAll)[1], function(x) max(dataAll[1:x,"xcorr_c_FDR"]))) dataAll <‐ cbind(dataAll, xcorr_c_FDRa)
res <‐ dataAll[dataAll$xcorr_c_FDR < 0.01 & dataAll$isTarget == 1,]
#res <‐ dataAll[dataAll$xcorr_c_FDR < 0.01,]
fileName <‐ strsplit(data[1,"FileName"], fixed = TRUE, split =
"\\")[[1]][length(strsplit(data[1,"FileName"], fixed = TRUE, split = "\\")[[1]])]
pRatio <‐ "NA"; pI <‐ "NA"; Xcorr1Original <‐ "NA"; Xcorr2Search <‐ "NA"; Sp <‐ "NA"; SpRank <‐ "NA";
ProteinsWithPeptide <‐ "NA"
resPratio <‐
cbind(fileName,fileName,res[,c("FirstScan","LastScan","Charge")],pRatio,res[,c("xcorr_c_FDR","Descripti on","SequenceMod")],pI,res[,c("Mass","xcorr_c")],Xcorr1Original,Xcorr2Search,res[,"DeltaScore"],Sp,Sp Rank,ProteinsWithPeptide,res[,"Redundances"])
colnames(resPratio) <‐
c("FileName","RAWFile","FirstScan","LastScan","Charge","pRatio","FDR","FASTAProteinDescription","Se quence","pI","PrecursorMass","Xcorr1Search","Xcorr1Original","Xcorr2Search","DeltaCn","Sp","SpRank"
,"ProteinsWithPeptide","Redundances")
#SIMPLYFIED
resPratio <‐
resPratio[,c("FileName","RAWFile","FirstScan","LastScan","Charge","Sequence","FASTAProteinDescripti on","Xcorr1Search","FDR","Redundances")]
# pRatio modification parsing
resPratio$Sequence <‐ gsub('\\[57.021464\\]','*',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[125.047679\\]','_',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[15.994915\\]','#',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[229.162932\\]','@',resPratio$Sequence) resPratio$Sequence <‐ gsub('\\[113.08407\\]','^',resPratio$Sequence)
resPratio$Sequence <‐ gsub('\\[304.20536\\]','{',resPratio$Sequence)
write.table(resPratio,file = output,col.names = TRUE, row.names = FALSE,sep="\t", quote = FALSE)
#}
#write.table(res,file = output,col.names = TRUE, row.names = FALSE,sep="\t", quote = FALSE)
LC2. Archivo Congif.txt previo a la ejecución del Pre‐SanXoT.
##############################################################################
# Params to Pre‐SanXoT
##############################################################################
# Write the name of the Experiments Name to be analyzed Expto=c("iTRAQ_1","iTRAQ_2")
# Pattern of folders that contains the MSFs Patern=c("FR_*")
# Channels used in the Experiments ChannelID=c(1:8)
# Type of label used Typeoflabel=c("iTRAQ")
# Tags Used in the Experiment (All is "ALL")
TagsUsed=c("113","114","115","116","117","118","119","121")
# Control Tag ControlTag=c("121")
# Mean Tag Calculation MeanCalculation=c("FALSE")
# Mean Tags
MeanTags=c("126","131")
# First Tag FirstTag=c("113")
# Search Engine SearchEngine=c("2")
# Daemon used (TRUE or FALSE) Daemon=c("TRUE")
# Number of comparatives within the Experiment Comparatives=c("8")
# To Absolute Quantification (TRUE = Absolute Quantification, FALSE = Relative Quantification or BOTH = Both)
Absolute=c("BOTH")
# Calculate all against all tags Random=c("YES")
############################################################################
# Params to Tag File Maker
##############################################################################
# When you have only ONE integration Samples to Integrate (Expto_Tag)
Integration<‐c("SPIROS_128_N","SPIROS_128_C","SPIROS_129_N")
# Number of Integrations NOI=c("3")
# Integration Names and Tags Used Control<‐c("126","127_N","127_C") CR2<‐c("128_N","128_C","129_N") CR7<‐c("129_C","130_N","130_C") Integrations<‐c("Control", "CR2", "CR7")
##############################################################################
LC3. Líneas de comandos de R para ejecutar el Pre‐SanXoT.
(WD <‐ getwd())
if (!is.null(WD)) setwd(WD) source(paste0(WD,"/Config.txt"))
##############################################################################
# Pre‐SanXoT
##############################################################################
list.dirs <‐ function(path=".", pattern=NULL, all.dirs=FALSE, full.names=FALSE, ignore.case=FALSE) { # use full.names=TRUE to pass to file.info all <‐ list.files(path, pattern, all.dirs,
full.names=TRUE, recursive=FALSE, ignore.case) dirs <‐ all[file.info(all)$isdir]
# determine whether to return full names or just dir names if(isTRUE(full.names))
return(dirs) else
return(basename(dirs)) }
MSFfolders <‐ list.dirs(path = paste0(WD,"/",Expto,"/MSF"), pattern=Patern) library("RSQLite")
for (j in Expto){
for (k in MSFfolders){
files <‐ list.files(path = paste(WD,"/",j,"/MSF/",k,sep=""),pattern="*.msf") for (i in files) {
db=dbConnect(SQLite(), dbname=paste(WD,"/",j,"/MSF/",k,"/",i,sep="")) if(SearchEngine=="2"){
data=dbGetQuery(conn = db,
"SELECT [SpectrumHeaders].[FirstScan], [ReporterIonQuanResults].[Mass] AS [Mass2], [ReporterIonQuanResults].[Height] AS [Height1], [SpectrumHeaders].[RetentionTime],
[ReporterIonQuanResults].[QuanChannelID], [MassPeaks].[MassPeakID],
[Workflows].[WorkflowName] AS [FileName]
FROM [ReporterIonQuanResults]
INNER JOIN [SpectrumHeaders] ON [ReporterIonQuanResults].[SpectrumID] =
[SpectrumHeaders].[SpectrumID]
INNER JOIN [MassPeaks] ON [MassPeaks].[MassPeakID] = [SpectrumHeaders].[MassPeakID]
INNER JOIN [WorkflowInputFiles] ON [MassPeaks].[FileID] = [WorkflowInputFiles].[FileID]
INNER JOIN [Workflows] ON [WorkflowInputFiles].[WorkflowID] = [Workflows].[WorkflowID]
WHERE [ReporterIonQuanResults].[Mass] > 0") } else {
data=dbGetQuery(conn = db,
"SELECT [SpectrumHeaders].[FirstScan], [ReporterIonQuanResults].[Mass] AS [Mass2], [ReporterIonQuanResults].[Height] AS [Height1], [SpectrumHeaders].[RetentionTime],
[ReporterIonQuanResults].[QuanChannelID], [MassPeaks].[MassPeakID],
[WorkflowInfo].[WorkflowName] AS [FileName]
FROM [ReporterIonQuanResults]
INNER JOIN [SpectrumHeaders] ON [ReporterIonQuanResults].[SpectrumID] = [SpectrumHeaders].[SpectrumID]
INNER JOIN [MassPeaks] ON [MassPeaks].[MassPeakID] = [SpectrumHeaders].[MassPeakID]
INNER JOIN [FileInfos] ON [MassPeaks].[FileID] = [FileInfos].[FileID], [WorkflowInfo]
WHERE [ReporterIonQuanResults].[Mass] > 0")}
i <‐ substr(i, 1, nchar(i) ‐ 4)
write.csv(data, file=paste(WD,"/",j,"/Pre‐SanXoT/",i,".csv",sep=""),row.names=FALSE)}}}
for (j in Expto){
files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="*.csv", full.names = TRUE) all_q <‐ do.call("rbind", lapply(files, read.csv, header = TRUE))
if (Daemon == "TRUE" | SearchEngine=="2"){
all_q$FileName<‐paste(all_q$FileName,".raw",sep="") } else {
all_q$FileName<‐substring(all_q$FileName,1,(nchar(as.character(all_q$FileName))‐4)) all_q$FileName<‐paste(all_q$FileName,".raw",sep="")}
write.table(all_q, file = paste(WD,"/",j,"/Pre‐SanXoT/Q‐all.txt",sep=""), sep="\t", row.names = FALSE)}
if (length(Expto)<2) { y<‐all_q
q_all<‐data.frame() for (i in ChannelID){
TMT<‐y[,"QuanChannelID",drop=FALSE]==i z<‐y[TMT,][,,drop=FALSE]
TMTgood<‐complete.cases(z) #posicion de NaN a<‐z[TMTgood,][,,drop=FALSE]
c<‐a[,c("FirstScan","Height1","FileName")]
colnames(c)=c("FirstScan",i,"FileName") if (nrow(q_all)==0){
q_all<‐c
} else {q_all<‐merge(q_all,c)}}
if (Typeoflabel=="TMT"){
if (TagsUsed=="ALL"){
colnames(q_all)=c("FirstScan","FileName","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X 129_C","X130_N","X130_C","X131")
} else {
colnames_TMT=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)
colnames_TMT=append(colnames_TMT, TagsUsed) colnames(q_all)=colnames_TMT
colnames_TMT=c("Raw_FirstScan")
colnames_TMT=append(colnames_TMT, TagsUsed)}
}else{
if (TagsUsed=="ALL"){
colnames(q_all)=c("FirstScan","FileName","X113","X114","X115","X116","X117","X118","X119","X121") } else {
colnames_iTRAQ=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)
colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed) colnames(q_all)=colnames_iTRAQ
colnames_iTRAQ=c("Raw_FirstScan")
colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed)}}
write.table(q_all, file = paste(WD,"/",j,"/Pre‐SanXoT/Q‐all.xls",sep=""), sep=",", row.names = FALSE) } else {
for (j in Expto){
files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="Q‐all.txt", full.names = TRUE) y<‐read.table(files, header=TRUE, sep="\t")
q_all<‐data.frame() for (i in ChannelID){
TMT<‐y[,"QuanChannelID",drop=FALSE]==i z<‐y[TMT,][,,drop=FALSE]
TMTgood<‐complete.cases(z) #posicion de NaN a<‐z[TMTgood,][,,drop=FALSE]
c<‐a[,c("FirstScan","Height1","FileName")]
colnames(c)=c("FirstScan",i,"FileName") if (nrow(q_all)==0){
q_all<‐c
} else {q_all<‐merge(q_all,c)}}
if (Typeoflabel=="TMT"){
if (TagsUsed=="ALL"){
colnames(q_all)=c("FirstScan","FileName","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X 129_C","X130_N","X130_C","X131")
} else {
colnames_TMT=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)
colnames_TMT=append(colnames_TMT, TagsUsed) colnames(q_all)=colnames_TMT
colnames_TMT=c("Raw_FirstScan")
colnames_TMT=append(colnames_TMT, TagsUsed)}
}else{
if (TagsUsed=="ALL"){
colnames(q_all)=c("FirstScan","FileName","X113","X114","X115","X116","X117","X118","X119","X121") } else {
colnames_iTRAQ=c("FirstScan","FileName") TagsUsed=paste0("X",TagsUsed)
colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed) colnames(q_all)=colnames_iTRAQ
colnames_iTRAQ=c("Raw_FirstScan")
colnames_iTRAQ=append(colnames_iTRAQ, TagsUsed)}}
write.table(q_all, file = paste(WD,"/",j,"/Pre‐SanXoT/Q‐all.xls",sep=""), sep=",", row.names = FALSE)}}
for (j in Expto){
for (k in MSFfolders){
files <‐ list.files(path = paste(WD,"/",j,"/MSF/",k,sep=""),pattern="_results", full.names = TRUE) if (length(files) > 0){
ID_all<‐ read.table(files, sep="\t",comment.char = "¡",quote = "¿", header = TRUE) files <‐ list.files(path = paste(WD,"/",j,"/MSF/",k,sep=""),pattern="_results")
write.table(ID_all, file = paste(WD,"/",j,"/Pre‐SanXoT/",k,files,sep=""), sep="\t", row.names = FALSE)}}
files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="_results", full.names = TRUE) ID_all <‐ do.call("rbind", lapply(files, read.table, header = TRUE))
write.table(ID_all, file = paste(WD,"/",j,"/Pre‐SanXoT/ID‐all.txt",sep=""), sep="\t", row.names = FALSE)
file.remove(files)
files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="*.csv", full.names = TRUE) file.remove(files)}
if (length(Expto)<2) { k<‐q_all
x<‐ID_all
x$Raw_FirstScan<‐do.call(paste, c(x[c("RAWFile","FirstScan")], sep = "")) k$Raw_FirstScan<‐do.call(paste, c(k[c("FileName","FirstScan")], sep = "")) x$Raw_FirstScan<‐as.character(x$Raw_FirstScan)
k$Raw_FirstScan<‐as.character(k$Raw_FirstScan) if (Typeoflabel=="TMT"){
if (TagsUsed=="ALL"){
q<‐
k[,c("Raw_FirstScan","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X1 30_C","X131")]
} else {
q<‐k[,colnames_TMT]}
}else{
if (TagsUsed=="ALL"){
q<‐k[,c("Raw_FirstScan","X113","X114","X115","X116","X117","X118","X119","X121")]
} else {
q<‐k[,colnames_iTRAQ]}}
all<‐merge(x,q)
FirstTagIndex=as.numeric(grep(paste0("X",FirstTag), colnames(all)))
CalcIndex=trunc(seq(FirstTagIndex, by=(length(ChannelID)/as.numeric(Comparatives)), len = as.numeric(Comparatives)),1)
if (MeanTags=="ALL"){
if (Typeoflabel == "TMT"){
MeanTags<‐
c("X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X130_C","X131")
} else {
MeanTags<‐c("X113","X114","X115","X116","X117","X118","X119","X121")}}
for (i in CalcIndex){
ControlIndex=as.numeric(grep(paste0("X",ControlTag), colnames(all))) if (MeanCalculation == "TRUE") {
all$Mean <‐ rowMeans(all[,paste0("X",MeanTags)]) MeanIndex=as.numeric(grep("Mean", colnames(all))) all$newcolumn <‐ log2(all[,i]/all$Mean)
l <‐ substring(colnames(all)[i],2)
colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_Mean") } else {
all$newcolumn <‐ log2(all[,i]/all[,ControlIndex]) l <‐ substring(colnames(all)[i],2)
colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_",ControlTag)}
if (Absolute == "TRUE"){
all$newcolumn <‐ all[,c(i)]
colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS")}
if (Absolute == "FALSE"){
if (MeanCalculation == "TRUE"){
all$newcolumn <‐ apply(all[,c(i,MeanIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_Mean") } else {
all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}
if (Absolute == "BOTH"){
all$newcolumn <‐ all[,c(i)]
colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS") if (MeanCalculation == "TRUE"){
all$newcolumn <‐ apply(all[,c(i,MeanIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_Mean") } else {
all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}}
write.table(all, file = paste(WD,"/",j,"/Pre‐SanXoT/ID‐q.txt",sep=""), sep="\t", row.names = FALSE) } else {
for (j in Expto){
files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="Q‐all.xls", full.names = TRUE) k<‐read.table(files, header=TRUE, sep=",")
files <‐ list.files(path = paste(WD,"/",j,"/Pre‐SanXoT",sep=""),pattern="ID‐all.txt", full.names = TRUE) x<‐read.table(files, header=TRUE, sep="\t")
x$Raw_FirstScan<‐do.call(paste, c(x[c("RAWFile","FirstScan")], sep = "")) k$Raw_FirstScan<‐do.call(paste, c(k[c("FileName","FirstScan")], sep = "")) x$Raw_FirstScan<‐as.character(x$Raw_FirstScan)
k$Raw_FirstScan<‐as.character(k$Raw_FirstScan) if (Typeoflabel=="TMT"){
if (TagsUsed=="ALL"){
q<‐
k[,c("Raw_FirstScan","X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X1 30_C","X131")]
} else {
q<‐k[,colnames_TMT]}
}else{
if (TagsUsed=="ALL"){
q<‐k[,c("Raw_FirstScan","X113","X114","X115","X116","X117","X118","X119","X121")]
} else {
q<‐k[,colnames_iTRAQ]}}
all<‐merge(x,q)
FirstTagIndex=as.numeric(grep(paste0("X",FirstTag), colnames(all)))
CalcIndex=trunc(seq(FirstTagIndex, by=(length(ChannelID)/as.numeric(Comparatives)), len = as.numeric(Comparatives)),1)
if (MeanTags=="ALL"){
if (Typeoflabel == "TMT"){
MeanTags<‐
c("X126","X127_N","X127_C","X128_N","X128_C","X129_N","X129_C","X130_N","X130_C","X131") } else {
MeanTags<‐c("X113","X114","X115","X116","X117","X118","X119","X121")}}
for (i in CalcIndex){
ControlIndex=as.numeric(grep(paste0("X",ControlTag), colnames(all))) if (MeanCalculation == "TRUE") {
all$Mean <‐ rowMeans(all[,paste0("X",MeanTags)]) MeanIndex=as.numeric(grep("Mean", colnames(all))) all$newcolumn <‐ log2(all[,i]/all$Mean)
l <‐ substring(colnames(all)[i],2)
colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_Mean") } else {
all$newcolumn <‐ log2(all[,i]/all[,ControlIndex])
l <‐ substring(colnames(all)[i],2)
colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_",ControlTag)}
if (Absolute == "TRUE"){
all$newcolumn <‐ all[,c(i)]
colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS")}
if (Absolute == "FALSE"){
if (MeanCalculation == "TRUE"){
all$newcolumn <‐ apply(all[,c(i,MeanIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_Mean") } else {
all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}
if (Absolute == "BOTH"){
all$newcolumn <‐ all[,c(i)]
colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_ABS") if (MeanCalculation == "TRUE"){
all$newcolumn <‐ apply(all[,c(i,MeanIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_Mean") } else {
all$newcolumn <‐ apply(all[,c(i,ControlIndex)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",ControlTag)}}}}}
if (Random == "YES"){
for (i in CalcIndex){
for (m in CalcIndex){
all$newcolumn <‐ log2(all[,i]/all[,m]) l <‐ substring(colnames(all)[i],2) o <‐ substring(colnames(all)[m],2)
colnames(all)[ncol(all)] <‐ paste0("Xs_",l,"_",o) all$newcolumn <‐ apply(all[,c(i,m)], 1, max) colnames(all)[ncol(all)] <‐ paste0("Vs_",l,"_",o)}}}
write.table(all, file = paste(WD,"/",j,"/Pre‐SanXoT/ID‐q.txt",sep=""), sep="\t", row.names = FALSE)
##############################################################################
# Tag file Maker
##############################################################################
if (TagsUsed=="ALL"){
if (Typeoflabel == "TMT"){
TagsUsed<‐c("126","127_N","127_C","128_N","128_C","129_N","129_C","130_N","130_C","131")
} else {
TagsUsed<‐c("113","114","115","116","117","118","119","121")}}
Tag<‐c()
for (i in TagsUsed){
for (j in Expto){
tags_temp<‐paste(j,i,sep="_") if (NROW(Tag)==0){
Tag<‐tags_temp
} else {Tag<‐rbind(Tag,tags_temp)}}}
Path<‐c()
for (i in TagsUsed){
for (j in Expto){
path_temp<‐paste(WD,"/",j,"/SanXoT/",i,"/data/Q2A_lowerNormV.xls",sep="") if (NROW(Tag)==0){
Path<‐path_temp
} else {Path<‐rbind(Path,path_temp)}}}
Tag<‐as.data.frame(Tag) row.names(Tag) <‐ NULL colnames(Tag) <‐ "Tag"
Path<‐as.data.frame(Path) row.names(Path) <‐ NULL colnames(Path) <‐ "Path"
Tag_file_temp<‐cbind(Tag,Path)
write.table(Tag_file_temp, file=paste0(WD,"/Integration/Tag_file_temp.txt"),sep="\t", row.names = FALSE)
if (NOI == 1){
Tag_file<‐Tag_file_temp[Tag_file_temp$Tag %in% Integration,]
write.table(Tag_file, file=paste0(WD,"/Integration/Tag_file.txt"),sep="\t", row.names = FALSE) } else {
for (j in Integrations) { tag<‐paste(j,get(j),sep="_") for (i in tag){
tag<‐substring(tag,(nchar(j)+2),nchar(tag)) tag<‐paste(Expto,tag,sep="_")
Tag_file<‐Tag_file_temp[Tag_file_temp$Tag %in% tag, ] if (nrow(Tag_file)>1){
write.table(Tag_file, file=paste0(WD,"/Integration/",j,"_Tag_file.txt"),sep="\t", row.names = FALSE)}}}}
LC4. Líneas de comandos de C para ejecutar análisis con SanXoT.
set /p BaseFolder=Base Folder (without ""):
cd "C:\Carpeta\Programas\standalone exes"
set Q2CRelationFile="C:\Ubicación\Archivo\Relaciones\Proteina‐Categoría.txt C:
set /p Data=Path of ID‐q_comet:
set aljamiaSData_MOD=aljamia.exe ‐x"%Data%" ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"Scans_uncalibrated_MOD.xls" ‐ aS2P_inOUTs_uncalibrated_MOD ‐i"[Raw_FirstScan]‐[Charge]" ‐j"[Xs_%%i_121]" ‐k"[Vs_%%i_121]" ‐ l"PTM" ‐f"[Modified]== TRUE" ‐R1
set aljamiaSData_noMOD=aljamia.exe ‐x"%Data%" ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"Scans_uncalibrated_noMOD.xls" ‐ aS2P_inOUTs_uncalibrated_noMOD ‐i"[Raw_FirstScan]‐[Charge]" ‐j"[Xs_%%i_121]" ‐k"[Vs_%%i_121]" ‐ f"[Modified]== FALSE" ‐l"No_MOD" ‐R1
set aljamiaS2PRels_noMOD=aljamia.exe ‐x"%Data%" ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"S2P_RelationsFile_noMOD.xls" ‐ aS2P_RelationsFile_noMOD ‐i"[Sequence]" ‐j"[Raw_FirstScan]‐[Charge]" ‐f"[Modified]== FALSE" ‐ k"No_MOD" ‐R1
set aljamiaS2PRels_MOD=aljamia.exe ‐x"%Data%" ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"S2P_RelationsFile_MOD.xls" ‐ aS2P_RelationsFile_MOD ‐i"[Sequence]" ‐j"[Raw_FirstScan]‐[Charge]" ‐k"PTM" ‐f"[Modified]== TRUE" ‐ R1
set copyS2PRels=copy
"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile_noMOD.xls"+"%BaseFolder
%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile_MOD.xls"
"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls"
set aljamiaP2QRels_noMOD=aljamia.exe ‐x"%Data%" ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"P2Q_RelationsFile_noMOD.xls" ‐ aP2Q_RelationsFile_noMOD ‐i"[FASTAProteinDescription]" ‐j"[Sequence]" ‐f"[Modified]== FALSE" ‐ k"No_MOD" ‐R1
set aljamiaP2QRels_MOD=aljamia.exe ‐x"%Data%" ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" ‐o"P2Q_RelationsFile_MOD.xls" ‐
aP2Q_RelationsFile_MOD ‐i"[FASTAProteinDescription]" ‐j"[Sequence]" ‐k"PTM" ‐f"[Modified]== TRUE" ‐ R1
set copyP2QRels=copy
"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile_noMOD.xls"+"%BaseFolder
%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile_MOD.xls"
"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls"
set klibrate_noMOD=klibrate.exe ‐
d"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\Scans_uncalibrated_noMOD.xls" ‐ r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐ p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐aS2P_inOUTs_calibrated ‐o"scan_noMOD.xls" ‐g ‐R2
set klibrate_MOD=klibrate.exe ‐
d"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\Scans_uncalibrated_MOD.xls" ‐ r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐ p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐aS2P_inOUTs_calibrated_MOD ‐o"scan_MOD.xls" ‐g ‐ K"S2P_inOUTs_calibrated_infoFile.txt" ‐V"S2P_inOUTs_calibrated_infoFile.txt" ‐f ‐w20
set sanxotS2P_in_outs_NM=sanxot.exe ‐aS2P_inOuts_noMOD ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_noMOD.xls" ‐
r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐g
set sanxotsieveSP_NM=sanxotsieve.exe ‐aS2POuts_noMOD ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_noMOD.xls" ‐
r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐f0.01 ‐ V"S2P_inOuts_noMOD_infoFile.txt"
set sanxotS2P_no_outs_NM=sanxot.exe ‐aS2P_noOuts_noMOD ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_noMOD.xls" ‐
r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\S2POuts_noMOD_tagged.xls" ‐o"peptide_noMOD.xls" ‐g
‐V"S2P_inOuts_noMOD_infoFile.txt" ‐f ‐‐tags="!out"
set sanxotS2P_in_outs_PTM=sanxot.exe ‐aS2P_inOuts_MOD ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_MOD.xls" ‐
r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐g ‐f ‐ V"S2P_inOuts_noMOD_infoFile.txt"
set sanxotsieveSP_PTM=sanxotsieve.exe ‐aS2POuts_MOD ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_MOD.xls" ‐
r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\S2P_RelationsFile.xls" ‐f0.01 ‐ V"S2P_inOuts_noMOD_infoFile.txt"
set sanxotS2P_no_outs_PTM=sanxot.exe ‐aS2P_noOuts_MOD ‐
p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"scan_MOD.xls" ‐
r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\S2POuts_MOD_tagged.xls" ‐o"peptide_MOD.xls" ‐g ‐ V"S2P_inOuts_noMOD_infoFile.txt" ‐f ‐‐tags="!out"
set copypeptide=copy
"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\peptide_noMOD.xls"+"%BaseFolder%\%%j\SanXoT_MOD
\%%i\data\peptide_MOD.xls" "%BaseFolder%\%%j\SanXoT_MOD\%%i\data\peptide.xls"
set sanxotP2Q_in_outs=sanxot.exe ‐aP2Q_inOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls" ‐g ‐ v0.01 ‐‐tags="!PTM"
set sanxotsievePQ=sanxotsieve.exe ‐aP2QOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls" ‐f0.01 ‐ V"P2Q_inOuts_infoFile.txt"
set sanxotP2Q_no_outs=sanxot.exe ‐aP2Q_noOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\P2QOuts_tagged.xls" ‐o"protein.xls" ‐g ‐f
‐V"P2Q_inOuts_infoFile.txt" ‐‐tags="!PTM & !out"
set sanxotP2A_in_outs=sanxot.exe ‐aP2A_inOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"peptide.xls" ‐C ‐g
set sanxotQ2C_in_outs=sanxot.exe ‐aQ2C_inOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"protein.xls" ‐r%Q2CRelationFile% ‐g
set sanxotsieveQC=sanxotsieve.exe ‐aQ2COuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"protein.xls" ‐r%Q2CRelationFile% ‐f0.01 ‐V"Q2C_inOuts_infoFile.txt"
set sanxotQ2C_no_outs=sanxot.exe ‐aQ2C_noOuts ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐ d"protein.xls" ‐r"%BaseFolder%\%%j\SanXoT_MOD\%%i\data\Q2COuts_tagged.xls" ‐o"category.xls" ‐g ‐ V"Q2C_inOuts_infoFile.txt" ‐f ‐‐tags="!out"
set sanxotQ2A=sanxot.exe ‐aQ2A ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"protein.xls" ‐C ‐ V"Q2C_inOuts_infoFile.txt" ‐f ‐g
set sanxotC2A=sanxot.exe ‐aC2A ‐p"%BaseFolder%\%%j\SanXoT_MOD\%%i\data" ‐d"category.xls" ‐C ‐ v0 ‐f ‐g
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\data" md
"%BaseFolder%\%%j\SanXoT_MOD\%%i\data"
if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data" md
"%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data"
start "CALCULATION_%%j_%%i" cmd.exe /K "(%aljamiaSData_MOD% &
%aljamiaSData_noMOD% & %aljamiaS2PRels_noMOD% & %aljamiaS2PRels_MOD% & %copyS2PRels%
& %aljamiaP2QRels_noMOD% & %aljamiaP2QRels_MOD% & %copyP2QRels%)"
))
:wait_loop1
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\additional_data\P2Q_RelationsFile.xls"
goto wait_loop1
))
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
start "CALCULATION_%%j_%%i" cmd.exe /K "(%klibrate_noMOD% & %klibrate_MOD% &
%sanxotS2P_in_outs_NM% & %sanxotsieveSP_NM% & %sanxotS2P_no_outs_NM% &
%sanxotS2P_in_outs_PTM% & %sanxotsieveSP_PTM% & %sanxotS2P_no_outs_PTM% &
%copypeptide% & %sanxotP2Q_in_outs% & %sanxotsievePQ%)"
))
:wait_loop2
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\data\P2QOuts_tagged.xls" goto wait_loop2
))
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
start "CALCULATION_%%j_%%i" cmd.exe /K %sanxotP2Q_no_outs%
))
:wait_loop3
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
if not exist "%BaseFolder%\%%j\SanXoT_MOD\%%i\data\protein.xls" goto wait_loop3
))
for %%j in (iTRAQ_1) do (
for %%i in (113 114 115 116 117 118 119) do (
start "CALCULATION_%%j_%%i" cmd.exe /K "(%sanxotP2A_in_outs% & %sanxotQ2C_in_outs%
& %sanxotsieveQC% & %sanxotQ2C_no_outs% & %sanxotQ2A% & %sanxotC2A%)"
))