libpappsomspp
Library for mass spectrometry
msrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/msrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief base interface to read MSrun files
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #include <QDebug>
29 
30 #include "msrunreader.h"
31 #include "../../pappsomspp/exception/exceptionnotfound.h"
32 
33 
35  qRegisterMetaType<pappso::MsRunReaderSPtr>("pappso::MsRunReaderSPtr");
36 
37 
38 namespace pappso
39 {
40 
41 
42 bool
44 {
45  return false;
46 }
47 void
49 {
50 }
51 void
53  [[maybe_unused]] std::size_t size)
54 {
55 }
56 void
58 {
59  m_isReadAhead = is_read_ahead;
60 }
61 
62 bool
64 {
65  return m_isReadAhead;
66 }
67 
68 bool
70  unsigned int ms_level) const
71 {
72  if(needPeakList() == true)
73  {
74  if(ms_level < m_needPeakListByMsLevel.size())
75  {
76  return m_needPeakListByMsLevel[ms_level];
77  }
78  else
79  return true;
80  }
81  else
82  {
83  return false;
84  }
85 }
86 void
88  unsigned int ms_level, bool want_peak_list)
89 {
90  if(ms_level < m_needPeakListByMsLevel.size())
91  {
92  m_needPeakListByMsLevel[ms_level] = want_peak_list;
93  }
94 }
95 
96 bool
98 {
99  return false;
100 }
101 
102 
103 void
105 {
106  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
107  //<< "The data loading process ended.";
108 }
109 
110 
111 void
113  const QualifiedMassSpectrum &qspectrum)
114 {
115  // The vector[0] contains the number of spectra at MS
116  // The vector[1] contains the number of spectra at MS^2
117  // The vector[2] contains the number of spectra at MS^3
118  // ...
119 
120  unsigned int ms_level = qspectrum.getMsLevel();
121  if(ms_level == 0)
122  return;
123  if(ms_level > m_countMsLevelSpectrum.size())
124  {
125  m_countMsLevelSpectrum.resize(ms_level);
126  }
127  m_countMsLevelSpectrum[ms_level - 1]++;
128 }
129 
130 
131 unsigned long
132 MsRunSimpleStatistics::getMsLevelCount(unsigned int ms_level) const
133 {
134  if(ms_level == 0)
135  return 0;
136  if(ms_level > m_countMsLevelSpectrum.size())
137  return 0;
138  return (m_countMsLevelSpectrum[ms_level - 1]);
139 }
140 
141 
142 unsigned long
144 {
145  unsigned long total = 0;
146  for(unsigned long count : m_countMsLevelSpectrum)
147  {
148  total += count;
149  }
150  return total;
151 }
152 
153 
155 {
156  // qDebug();
157 }
158 
159 
161 {
162  // qDebug();
163 }
164 
165 
166 bool
168 {
169  return false;
170 }
171 
172 void
174  const QualifiedMassSpectrum &qspectrum)
175 {
176  qDebug() << " " << qspectrum.getMassSpectrumId().getNativeId();
177 
178  QStringList native_id_list =
179  qspectrum.getMassSpectrumId().getNativeId().split("=");
180  if(native_id_list.size() < 2)
181  {
182  return;
183  }
184  else
185  {
186  std::size_t scan_number = native_id_list.back().toULong();
187  m_mmap_scan2index.insert(std::pair<std::size_t, std::size_t>(
188  scan_number, qspectrum.getMassSpectrumId().getSpectrumIndex()));
189 
190  qDebug() << "scan number " << scan_number << "=>"
191  << qspectrum.getMassSpectrumId().getSpectrumIndex();
192  }
193 }
194 
195 std::size_t
197  std::size_t scan_number) const
198 {
199 
200  qDebug() << m_mmap_scan2index.size();
201 
202  auto it = m_mmap_scan2index.find(scan_number);
203 
204  if(it == m_mmap_scan2index.end())
205  {
206  throw ExceptionNotFound(
207  QObject::tr("scan number %1 not found").arg(scan_number));
208  }
209 
210  std::size_t index = it->second;
211 
212  it++;
213  if((it != m_mmap_scan2index.end()) && (it->first == scan_number))
214  {
215  throw PappsoException(
216  QObject::tr("scan number %1 found multiple times").arg(scan_number));
217  }
218  return index;
219 }
220 
221 
223 {
224  // qDebug();
225 }
226 
227 
229 {
230  // qDebug();
231 }
232 
233 
234 bool
236 {
237  return false;
238 }
239 
240 
241 void
243  const QualifiedMassSpectrum &qspectrum)
244 {
245  qDebug() << " " << qspectrum.getMassSpectrumId().getNativeId();
246 
247  m_retention_time_list.push_back(qspectrum.getRtInSeconds());
248 }
249 
250 const std::vector<double> &
252 {
253  return m_retention_time_list;
254 }
255 
256 
257 MsRunReader::MsRunReader(MsRunIdCstSPtr &ms_run_id) : mcsp_msRunId(ms_run_id)
258 {
259 }
260 
262  : mcsp_msRunId(other.mcsp_msRunId)
263 {
264  mpa_multiMapScanNumber = nullptr;
265 }
266 
267 
268 const MsRunIdCstSPtr &
270 {
271  return mcsp_msRunId;
272 }
273 
274 
276 {
277  if(mpa_multiMapScanNumber == nullptr)
278  delete mpa_multiMapScanNumber;
279 }
280 
281 
282 std::size_t
284 {
285  qDebug() << " " << mpa_multiMapScanNumber;
286 
287  if(mpa_multiMapScanNumber == nullptr)
288  {
291  }
292  try
293  {
295  scan_number);
296  }
297 
298  catch(ExceptionNotFound &error)
299  {
300  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
301  .arg(mcsp_msRunId.get()->getFileName())
302  .arg(error.qwhat()));
303  }
304  catch(PappsoException &error)
305  {
306  throw PappsoException(QObject::tr("error reading file %1 : %2")
307  .arg(mcsp_msRunId.get()->getFileName())
308  .arg(error.qwhat()));
309  }
310 }
311 
312 
313 bool
315 {
316  return false;
317 }
318 
319 std::vector<double>
321 {
322  qDebug();
323 
324  try
325  {
326 
327  MsRunReaderRetentionTimeLine reader_timeline;
328 
329  readSpectrumCollectionByMsLevel(reader_timeline, 1);
330 
331  return reader_timeline.getRetentionTimeLine();
332  }
333 
334  catch(ExceptionNotFound &error)
335  {
336  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
337  .arg(mcsp_msRunId.get()->getFileName())
338  .arg(error.qwhat()));
339  }
340  catch(PappsoException &error)
341  {
342  throw PappsoException(QObject::tr("error reading file %1 : %2")
343  .arg(mcsp_msRunId.get()->getFileName())
344  .arg(error.qwhat()));
345  }
346 }
347 
348 
349 Trace
351 {
352  qDebug();
353 
354  try
355  {
356  MsRunReaderTicChromatogram ms_run_reader;
357 
358  readSpectrumCollection(ms_run_reader);
359 
360  return ms_run_reader.getTicChromatogram();
361  }
362 
363  catch(ExceptionNotFound &error)
364  {
365  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
366  .arg(mcsp_msRunId.get()->getFileName())
367  .arg(error.qwhat()));
368  }
369  catch(PappsoException &error)
370  {
371  throw PappsoException(QObject::tr("error reading file %1 : %2")
372  .arg(mcsp_msRunId.get()->getFileName())
373  .arg(error.qwhat()));
374  }
375 }
376 
377 
379 {
380 }
381 
382 
384 {
385 }
386 
387 
388 bool
390 {
391  return true;
392 }
393 
394 
395 void
397  const QualifiedMassSpectrum &qualified_mass_spectrum)
398 {
399  // In this specialized reader we want to compute the total ion current
400  // chromatogram that plot the sum of all the ion intensities in the spectra as
401  // a function of the retention time.
402 
403  uint spectrum_ms_level = qualified_mass_spectrum.getMsLevel();
404 
405  if(spectrum_ms_level != 1)
406  return;
407 
408  double sumY = qualified_mass_spectrum.getMassSpectrumSPtr()->sumY();
409 
410  if(!sumY)
411  return;
412 
413  double rt = qualified_mass_spectrum.getRtInMinutes();
414 
415  using Pair = std::pair<double, double>;
416  using Map = std::map<double, double>;
417  using Iterator = Map::iterator;
418 
419  std::pair<Iterator, bool> res = m_ticChromMapTrace.insert(Pair(rt, sumY));
420 
421  if(!res.second)
422  {
423  // One other same rt value was seen already (like in ion mobility mass
424  // spectrometry, for example). Only increment the y value.
425 
426  res.first->second += sumY;
427  }
428 }
429 
430 
431 Trace
433 {
434  return m_ticChromMapTrace.toTrace();
435 }
436 
437 
438 } // namespace pappso
Trace toTrace() const
Definition: maptrace.cpp:218
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
collect retention times along MS run
Definition: msrunreader.h:150
const std::vector< double > & getRetentionTimeLine() const
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
std::vector< double > m_retention_time_list
Definition: msrunreader.h:152
provides a multimap to find quickly spectrum index from scan number
Definition: msrunreader.h:132
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
std::size_t getSpectrumIndexFromScanNumber(std::size_t scan_number) const
std::multimap< std::size_t, std::size_t > m_mmap_scan2index
Definition: msrunreader.h:134
calculate a TIC chromatogram
Definition: msrunreader.h:168
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &qualified_mass_spectrum) override
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:191
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:282
MsRunReaderScanNumberMultiMap * mpa_multiMapScanNumber
Definition: msrunreader.h:283
virtual bool hasScanNumbers() const
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
virtual std::vector< double > getRetentionTimeLine()
retention timeline get retention times along the MSrun in seconds
virtual std::size_t scanNumber2SpectrumIndex(std::size_t scan_number)
if possible, converts a scan number into a spectrum index This is a convenient function to help trans...
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual Trace getTicChromatogram()
MsRunReader(MsRunIdCstSPtr &ms_run_id)
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
const MsRunIdCstSPtr & getMsRunId() const
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
unsigned long getTotalCount() const
virtual void loadingEnded() override
std::vector< unsigned long > m_countMsLevelSpectrum
Definition: msrunreader.h:115
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
Definition: msrunreader.cpp:97
unsigned long getMsLevelCount(unsigned int ms_level) const
virtual const QString & qwhat() const
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
pappso_double getRtInMinutes() const
Get the retention time in minutes.
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual bool isReadAhead() const
tells if we want to read ahead spectrum
Definition: msrunreader.cpp:63
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual bool needMsLevelPeakList(unsigned int ms_level) const final
tells if we need the peak list (if we want the binary data) for each spectrum, given an MS level
Definition: msrunreader.cpp:69
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
Definition: msrunreader.cpp:57
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
Definition: msrunreader.cpp:87
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:52
A simple container of DataPoint instances.
Definition: trace.h:148
int msRunReaderSPtrMetaTypeId
Definition: msrunreader.cpp:34
base interface to read MSrun files
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
@ rt
Retention time.
unsigned int uint
Definition: types.h:55