Skip to content

Commit 92d5724

Browse files
committed
add utils functions and new as(mzRident, "data.frame")
1 parent 0fd2e98 commit 92d5724

11 files changed

+428
-193
lines changed

Diff for: DESCRIPTION

+2-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ Depends:
4848
ProtGenerics (>= 1.5.1)
4949
Imports:
5050
plyr,
51+
dplyr,
5152
IRanges,
5253
preprocessCore,
5354
vsn,
@@ -57,7 +58,7 @@ Imports:
5758
impute,
5859
pcaMethods,
5960
MALDIquant (>= 1.16),
60-
mzID (>= 1.5.2),
61+
mzID (>= 1.5.2),
6162
digest,
6263
lattice,
6364
ggplot2,

Diff for: NAMESPACE

+8-5
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ importFrom(S4Vectors, isEmpty, metadata)
3838
## importFrom(rols, olsQuery, term)
3939
## importClassesFrom(rols, CVParam)
4040
importFrom(pcaMethods, pca, completeObs)
41-
importFrom(plyr, ddply, llply, adply, summarise, summarize)
41+
importFrom(plyr, ddply, llply)
42+
importFrom(dplyr, left_join)
4243
importFrom(stats4, plot)
4344
importFrom(preprocessCore, normalize.quantiles)
4445
importFrom(vsn, vsn2, meanSdPlot)
45-
importFrom(IRanges, IRanges)
46+
importFrom(IRanges, IRanges, reduce)
4647
importClassesFrom(IRanges, IRanges)
4748
importFrom(affy, MAplot, ma.plot, mva.pairs)
4849
importFrom(mzID, mzID, flatten)
@@ -67,9 +68,9 @@ export(MSnSet,
6768
## extractSpectra, defunct in v 1.5.3
6869
extractPrecSpectra,
6970
as.matrix.FoICollection,
71+
as.data.frame.mzRident,
7072
as.data.frame.Spectrum,
7173
as.data.frame.MSnSet, ms2df,
72-
as.data.frame.mzRident,
7374
as.ExpressionSet.MSnSet,
7475
as.MSnSet.ExpressionSet,
7576
as.MIAME.MIAPE,
@@ -130,7 +131,8 @@ export(MSnSet,
130131
productMz,
131132
aggregationFun,
132133
Chromatogram,
133-
Chromatograms
134+
Chromatograms,
135+
factorsAsStrings
134136
)
135137

136138
exportClasses(pSet,
@@ -277,7 +279,8 @@ exportMethods(updateObject,
277279
pData, "pData<-",
278280
"$",
279281
"$<-",
280-
chromatogram)
282+
chromatogram,
283+
reduce)
281284

282285
## methods NOT exported
283286
## curveStats

Diff for: NEWS.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# MSnbase 2.3
22

33
## Changes in version 2.3.9
4-
- Nothing yet
4+
- Using new mzR::openIdfile backend to add identifcation data to raw
5+
and quantitative data (see issue #232) <2017-07-28 Fri>
6+
- New utils functions: factorsAsStrings, makeCamelCase and
7+
reduce,data.frame <2017-07-29 Sat>
58

69
## Changes in version 2.3.8
710
- New infrastructure for chromatogram data <2017-06-24 Sat>

Diff for: R/functions-mzR.R

+37-22
Original file line numberDiff line numberDiff line change
@@ -111,33 +111,48 @@ plotMzDelta_list <- function(object, ## peakLists
111111
invisible(p)
112112
}
113113

114+
##' A function to convert the identification data contained in an
115+
##' \code{mzRident} object to a \code{data.frame}. Each row represents
116+
##' a scan, which can however be repeated several times if the PSM
117+
##' matches multiple proteins and/or contains two or more
118+
##' modifications. To reduce the \code{data.frame} so that rows/scans
119+
##' are unique, use \link{\code{reduce}}.
120+
##'
121+
##' @title Coerce identification data to a \code{data.frame}
122+
##' @param from An object of class \code{mzRident} defined in the
123+
##' \code{mzR} package.
124+
##' @return A \code{data.frame}
125+
##' @author Laurent Gatto
126+
##' @name as
127+
##' @rdname mzRident2dfr
128+
##' @examples
129+
##' ## find path to a mzIdentML file
130+
##' identFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
131+
##' full.name = TRUE, pattern = "dummyiTRAQ.mzid")
132+
##' library("mzR")
133+
##' x <- openIDfile(identFile)
134+
##' x
135+
##' as(x, "data.frame")
114136
setAs("mzRident", "data.frame",
115137
function(from) {
116-
iddf <- psms(from)
138+
## peptide spectrum matching
139+
iddf <- factorsAsStrings(psms(from))
140+
## add file raw and mzid provenances
117141
iddf$spectrumFile <- basename(sourceInfo(from))
118142
iddf$idFile <- basename(fileName(from))
119-
iddf <- utils.leftJoin(iddf, score(from),
120-
by.x = "spectrumID",
121-
by.y = "spectrumID")
122-
mods <- modifications(from)
123-
names(mods)[-1] <- paste0("mod.", names(mods)[-1])
124-
names(mods)[-1] <- gsub('\\.(\\w?)', '\\U\\1', names(mods)[-1], perl = TRUE)
125-
iddf <- utils.leftJoin(iddf, mods,
126-
by.x = "spectrumID",
127-
by.y = "spectrumID")
128-
subs <- substitutions(from)
129-
names(subs)[-1] <- paste0("sub.", names(subs)[-1])
130-
names(subs)[-1] <- gsub('\\.(\\w?)', '\\U\\1', names(subs)[-1], perl = TRUE)
131-
iddf <- utils.leftJoin(iddf, subs,
132-
by.x = "spectrumID",
133-
by.y = "spectrumID")
134-
iddf <- lapply(iddf,
135-
function(x) {
136-
if (is.factor(x)) as.character(x)
137-
else x
138-
})
139-
data.frame(iddf, stringsAsFactors = FALSE)
143+
## add scores
144+
scores <- factorsAsStrings(score(from))
145+
iddf <- suppressMessages(left_join(iddf, scores))
146+
## add modification
147+
mods <- factorsAsStrings(modifications(from))
148+
names(mods)[-1] <- makeCamelCase(names(mods), prefix = "mod")[-1]
149+
iddf <- suppressMessages(left_join(iddf, mods))
150+
## add substitutions
151+
subs <- factorsAsStrings(substitutions(from))
152+
names(subs)[-1] <- makeCamelCase(names(subs), prefix = "sub")[-1]
153+
suppressMessages(left_join(iddf, subs))
140154
})
141155

142156
as.data.frame.mzRident <-
143157
function(x, row.names = NULL, optional = FALSE, ...) as(x, "data.frame")
158+

Diff for: R/methods-MSnSet.R

+3
Original file line numberDiff line numberDiff line change
@@ -656,10 +656,12 @@ setMethod("addIdentificationData", c("MSnSet", "character"),
656656
verbose = isMSnbaseVerbose()) {
657657
if (length(id) == 1 && file.exists(id)) {
658658
id <- mzR::openIDfile(id)
659+
id <- reduce(id, key = "spectrumID")
659660
} else {
660661
if (!all(flex <- file.exists(id)))
661662
stop(paste(id[!flex], collapse = ", "), " not found.")
662663
id <- lapply(id, function(x) as(openIDfile(x), "data.frame"))
664+
id <- lapply(id, reduce, key = "spectrumID")
663665
id <- do.call(rbind, id)
664666
}
665667
addIdentificationData(object, id = id,
@@ -676,6 +678,7 @@ setMethod("addIdentificationData", c("MSnSet", "mzRident"),
676678
pepseq = "sequence",
677679
...) {
678680
iddf <- as(id, "data.frame")
681+
iddf <- reduce(iddf)
679682
addIdentificationData(object, iddf,
680683
fcol = fcol, icol = icol,
681684
acc, desc, pepseq)

0 commit comments

Comments
 (0)