## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse=TRUE, comment = "#>") ## ----setup1, echo=TRUE, warnings=FALSE---------------------------------------- library(wrMisc) library(wrProteo) library(wrTopDownFrag) ## ----AAfragSettings1, echo=TRUE----------------------------------------------- ## common settings str(AAfragSettings()) ## ----massDeFormula1, echo=TRUE------------------------------------------------ # Standard way to obtain the (monoisotopic) mass of water (H20) or a phosphorylation (PO3) # Note that the number a molecule appears must be written in front of the molecule (no number means one occurance) massDeFormula(c("2HO", "P3O")) # Undereith this runs (for H20): 2*.atomicMasses()["H","mono"] +.atomicMasses()["O","mono"] # H2O ## ----convAASeq2mass1, echo=TRUE----------------------------------------------- # Let's define two small amino-acid sequences protP <- c(pepK="KPEPTIDRPEP", pepC="CEPEPTRT", pepC2="PECEPTRT") # The sequence converted to mass convAASeq2mass(protP) ## ----nFragm1, out.width="150%", out.heigth="80%", echo=TRUE------------------- marks <- data.frame(name=c("Ubiquitin\n76aa","Glutamate dehydrogenase 1\n501aa"), length=c(76,501)) layout(matrix(1:2,ncol=2)) plotNTheor(x=20:750, log="", mark=marks) plotNTheor(x=20:750, log="xy", mark=marks) mtext("log/log scale", cex=0.8, line=0.1) ## ----fragmentSeq1, echo=TRUE-------------------------------------------------- protP <- c(pepK="KPEPTIDRPEP", pepC="CEPEPTRT") ## Basic output fragmentSeq(protP[1], minSize=3, internFragments=TRUE, pref="pepK") ## ----makeFragments1, echo=TRUE------------------------------------------------ ## Elaborate output protP2 <- cbind(na=names(protP), se=protP, ma=wrProteo::convAASeq2mass(protP, seqName=TRUE)) pepT1 <- makeFragments(protTab=protP2, minFragSize=3, maxFragSize=9, internFra=TRUE) ## ----makeFragments2, echo=TRUE------------------------------------------------ head(pepT1) dim(pepT1) ## The repartition between types of fragments : table(pepT1[,"ty"]) ## Types of ambiguities encountered table(pepT1[,"ambig"]) ## ----example1a, echo=TRUE----------------------------------------------------- Alb.seq <- "HLVDEPQNLIK" (Alb.mass <- wrProteo::convAASeq2mass("HLVDEPQNLIK", massTy="mono")) ## Now the mass for the MH+ ion (Alb.MHp <- wrProteo::convAASeq2mass("HLVDEPQNLIK", massTy="mono") + wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["e","mono"]) ## ----example1b, echo=TRUE----------------------------------------------------- ## delta prospector Alb.MHp - 1305.7161 ## ----example1c, echo=TRUE----------------------------------------------------- AlbFrag <- makeFragments(Alb.seq, minFragSize=2, maxFragSize=17, internFra=FALSE) head(AlbFrag) AlbFrag[which(AlbFrag[,"ty"]=="Nter"), c("seq","mass","beg","ty")] ## ----example1d, echo=TRUE----------------------------------------------------- Alb.b <- as.numeric(AlbFrag[which(AlbFrag[,"ty"]=="Nter"),c("mass")]) - 1*wrProteo::.atomicMasses()["O","mono"] - wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["e","mono"] Alb.y <- as.numeric(AlbFrag[which(AlbFrag[,"ty"]=="Cter"),c("mass")]) + wrProteo::.atomicMasses()["H","mono"] - 1*wrProteo::.atomicMasses()["e","mono"] Alb.a <- as.numeric(AlbFrag[which(AlbFrag[,"ty"]=="Nter"),c("mass")]) - wrProteo::.atomicMasses()["C","mono"] - 2*wrProteo::.atomicMasses()["O","mono"] - wrProteo::.atomicMasses()["H","mono"] - 1*wrProteo::.atomicMasses()["e","mono"] Alb.x <- as.numeric(AlbFrag[which(AlbFrag[,"ty"]=="Cter"),c("mass")]) + 1*wrProteo::.atomicMasses()["C","mono"] + 1*wrProteo::.atomicMasses()["O","mono"] - 1*wrProteo::.atomicMasses()["H","mono"] - 1*wrProteo::.atomicMasses()["e","mono"] AlbFrag.int <- makeFragments(Alb.seq, minFragSize=2, maxFragSize=17, internFra=TRUE) dim(AlbFrag.int) ## ----example1e, echo=TRUE----------------------------------------------------- ## Masses from Prospector for a,x,b- and y-ions Alb.prs.a <- c(223.1553, 322.2238, 437.2507, 566.2933, 663.3461, 791.4046, 905.4476, 1018.5316, 1131.6157) Alb.prs.x <- rev(c(1194.6365, 1081.5524, 982.4840, 867.4571, 738.4145, 641.3617, 513.3031, 399.2602, 286.1761)) #, 173.0921)) Alb.prs.b <- c(251.1503, 350.2187, 465.2456, 594.2882, 691.3410, 819.3995, 933.4425, 1046.5265, 1159.6106) Alb.prs.y <- rev(c(1168.6572, 1055.5732, 956.5047, 841.4778, 712.4352, 615.3824, 487.3239, 373.2809, 260.1969)) cbind(wr.b=Alb.b, d.b=Alb.b - Alb.prs.b, wr.y=Alb.y, d.y= Alb.y - Alb.prs.y) ## ----makeFragmentsB1, echo=TRUE----------------------------------------------- prot1 <- "RTVAAPSVFIFPPSDEQLKSG" ## ----makeFragmentsB2, echo=TRUE----------------------------------------------- (prot1.MHp <- wrProteo::convAASeq2mass(prot1, massTy="mono") + wrProteo::.atomicMasses()["H","mono"] -wrProteo::.atomicMasses()["e","mono"]) ## Besides, lateron we'll also need the mass of water (H2O.mass <- 2*wrProteo::.atomicMasses()["H","mono"] + wrProteo::.atomicMasses()["O","mono"]) ## ----makeFragmentsB3, echo=TRUE----------------------------------------------- ## b y ## --- 1 R 21 --- ## 258.1561 2 T 20 2090.0804 ## 357.2245 3 V 19 1989.0328 ## 428.2616 4 A 18 1889.9644 <== use b4 as example ## 499.2987 5 A 17 1818.9272 <= and corresp y17 ## ... . ... ... ## ----makeFragmentsB4, echo=TRUE----------------------------------------------- (prot1.b4.MHp <- wrProteo::convAASeq2mass("RTVA", massTy="mono") - wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["O","mono"] - wrProteo::.atomicMasses()["e","mono"]) ## ----makeFragmentsB5, echo=TRUE----------------------------------------------- (prot1.y18.MHp <- wrProteo::convAASeq2mass("APSVFIFPPSDEQLKSG", massTy="mono") + wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["e","mono"]) ## ----makeFragmentsB6, echo=TRUE----------------------------------------------- (prot1.MHp) - (prot1.b4.MHp + prot1.y18.MHp - wrProteo::.atomicMasses()["H","mono"] + wrProteo::.atomicMasses()["e","mono"]) ## ----makeFragmentsB7, echo=TRUE----------------------------------------------- prot1Frag <- makeFragments(prot1, minFragSize=4, maxFragSize=17, internFra=FALSE) prot1Frag[c(2,27),] # our prevous b- & y-ion (as neutral peptide mass) ## ----makeFragmentsB8, echo=TRUE----------------------------------------------- as.numeric(prot1Frag[2,"mass"]) - H2O.mass + wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["e","mono"] ## ----makeFragmentsB9, echo=TRUE----------------------------------------------- as.numeric(prot1Frag[27,"mass"]) + wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["e","mono"] ## ----makeFragmentsC1, echo=TRUE----------------------------------------------- ## Prospector internal fragments for "PSDEQL": ## Internal Sequence b a b-NH3 b-H2O ## ... ... ... ... ... ## IFPPSD 657.3243 629.3293 --- 639.3137 ## PSDEQL 670.3042 642.3093 653.2777 652.2937 ## ----makeFragmentsC2, echo=TRUE----------------------------------------------- prot1FragInt <- makeFragments(prot1, minFragSize=4, maxFragSize=17, internFra=TRUE) dim(prot1FragInt) ## ----makeFragmentsC3, echo=TRUE----------------------------------------------- PSDEQL.idx <- which(prot1FragInt[,2] =="PSDEQL") prot1FragInt[PSDEQL.idx -(1:0),] ## ----makeFragmentsC4, echo=TRUE----------------------------------------------- (PSDEQL.MH <- as.numeric(prot1FragInt[PSDEQL.idx,"mass"]) - H2O.mass + wrProteo::.atomicMasses()["H","mono"]) (PSDEQL.MHp <- as.numeric(prot1FragInt[PSDEQL.idx,"mass"]) - H2O.mass + wrProteo::.atomicMasses()["H","mono"] - wrProteo::.atomicMasses()["e","mono"]) c(Prospector=670.3042) - c(PSDEQL.MH, PSDEQL.MHp) ## ----toyData1, echo=TRUE------------------------------------------------------ # The toy peptide/protein sequnce protP <- c(pepK="KPEPTIDRPEP", pepC="CEPEPTRT") obsMass1 <- cbind(a=c(424.2554,525.3031,638.3872,753.4141,909.5152,1006.5680,1135.6106), b=c(452.2504,553.2980,666.3821,781.4090,937.5102,1034.5629,1163.6055), x=c(524.2463,639.2733,752.3573,853.4050,950.4578,1079.5004,1176.5531), y=c(498.2671,613.2940,726.3781,827.4258,924.4785,1053.5211,1150.5739), bdH=c(434.2398,535.2875,648.3715,763.3985,919.4996,1016.5524,1145.5949), ydH=c(480.2565,1132.5633,595.2835,708.3675,809.4152,906.4680,1035.5106), bi=c(498.2307,583.3198,583.3198,611.3148,680.3726,712.3624,712.3624), bidH=c(662.3620,694.3519,694.3519,791.4046,791.4046,791.4046,888.4574), bidN=c(663.3461,695.3359,695.3359,792.3886,792.3886,792.3886,889.4414), ai=c(652.3777,684.3675,684.3675,781.4203,781.4203,781.4203,878.4730) ) rownames(obsMass1) <- c("P","T","I","D","R","P","E") # only for N-term (a & b) ## This example contains several iso-mass cases length(obsMass1) length(unique(as.numeric(obsMass1))) ## ----toyData2, echo=TRUE------------------------------------------------------ ## have same mass ## 480.2201 DRPE-H2O & y4-H2O ## 583.3198 PTIDR & TIDRP ; 565.3093 PTIDR-H2O & TIDRP-H2O ## 809.4152 y7-H2O & EPTIDRP ## 684.3675 TIDRPE-CO & EPTIDR-CO ; 694.3519 EPTIDR-H2O & TIDRPE-H2O ## 712.3624 TIDRPE & EPTIDR ## ----toyData3, echo=TRUE------------------------------------------------------ # Now we'll add some random noise (to mimick experimental data) set.seed(2020) obsMass2 <- as.numeric(obsMass1) obsMass2 <- obsMass2 + runif(length(obsMass2), min=-2e-4, max=2e-4) ## ----identifFixedModif2, echo=TRUE-------------------------------------------- identP1 <- identifFixedModif(prot=protP[1], expMass=obsMass2, minFragSize=3, maxFragSize=7, modTy=list(basMod=c("b","y"))) # should find 10term +10inter ## This function returns a list str(identP1) ## $masMatch identifies each ## ----identifFixedModif2b, echo=TRUE------------------------------------------- identP1$preMa[ which(identP1$preMa[,"no"] %in% (names(identP1$massMatch))),c("seq","orig","ty","beg","end","precAA","finMass","mod")] ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()