libpappsomspp
Library for mass spectrometry
pappso::Enzyme Class Reference

#include <enzyme.h>

Public Member Functions

 Enzyme ()
 build the default enzyme (trypsin) with recognition_site = "([KR])([^P])" More...
 
 Enzyme (const QString &recognition_site)
 build any enzyme given a recognition_site More...
 
 ~Enzyme ()
 
void eat (std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, EnzymeProductInterface &enzyme_product) const
 digest a protein into enzyme products More...
 
void setMiscleavage (unsigned int miscleavage)
 sets the maximum number of missed cleavage allowed in the digestion More...
 
unsigned int getMiscleavage () const
 get the maximum number of missed cleavage allowed in the digestion More...
 
void setTakeOnlyFirstWildcard (bool take_only_first_wildcard)
 take only first m_takeOnlyFirstWildcard More...
 
void setMaxPeptideVariantListSize (std::size_t max_peptide_variant_list_size)
 if there are wildcards in the protein sequence : restrict the number of possible peptide sequences More...
 
const QRegExp & getQRegExpRecognitionSite () const
 

Private Member Functions

void sanityCheck (EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
 
void replaceWildcards (std::vector< std::string > *p_peptide_variant_list) const
 

Private Attributes

QRegExp m_recognitionSite
 example with a kinase == [K,R] More...
 
unsigned int m_miscleavage = 0
 
bool m_takeOnlyFirstWildcard = false
 
std::size_t m_maxPeptideVariantListSize = 100
 
std::vector< char > m_wildCardX
 
std::vector< char > m_wildCardB
 
std::vector< char > m_wildCardZ
 

Detailed Description

Definition at line 52 of file enzyme.h.

Constructor & Destructor Documentation

◆ Enzyme() [1/2]

pappso::Enzyme::Enzyme ( )

build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"

Definition at line 53 of file enzyme.cpp.

55  {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
56  'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
57  m_wildCardX.assign(std::begin(vv1), std::end(vv1));
58 
59  char vv2[] = {'N', 'D'};
60  m_wildCardB.assign(std::begin(vv2), std::end(vv2));
61 
62  char vv3[] = {'Q', 'E'};
63  m_wildCardZ.assign(std::begin(vv3), std::end(vv3));
64 }
65 
67 {
68 }

◆ Enzyme() [2/2]

pappso::Enzyme::Enzyme ( const QString &  recognition_site)

build any enzyme given a recognition_site

Parameters
recognition_siteis a regular expression that must identify 2 motifs : one on Nter side one on Cter side

Definition at line 70 of file enzyme.cpp.

72 {
73  m_miscleavage = miscleavage;
74 }
75 unsigned int
77 {
78  return m_miscleavage;
79 }
80 void
81 Enzyme::setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size)
82 {
83  m_maxPeptideVariantListSize = max_peptide_variant_list_size;
84 }
85 

References m_miscleavage.

◆ ~Enzyme()

pappso::Enzyme::~Enzyme ( )

Definition at line 87 of file enzyme.cpp.

91 {

Member Function Documentation

◆ eat()

void pappso::Enzyme::eat ( std::int8_t  sequence_database_id,
const ProteinSp protein_sp,
bool  is_decoy,
EnzymeProductInterface enzyme_product 
) const

digest a protein into enzyme products

Parameters
sequence_database_idinteger that references the sequence fatabase (file, stream, url...)
protein_spis the original protein to be digested
is_decoytell if the current protein is a decoy (true) or normal (false) protein
enzyme_productis the object that will receive the digestion products

Definition at line 108 of file enzyme.cpp.

113  {
114  peptide_size = pos + m_recognitionSite.cap(1).length() - peptide_start;
115  // qDebug() << "pos=" << pos << " peptide_start=" << peptide_start << "
116  // peptide_size=" << peptide_size << " " <<
117  // sequence.mid(peptide_start,peptide_size);
118  if(peptide_size > 0)
119  {
120  peptide_list.append(sequence.mid(peptide_start, peptide_size));
121  }
122  peptide_start += peptide_size;
123  pos = peptide_start; // all peptides MUST be consecutive
124  }
125  peptide_size = sequence.size() - peptide_start;
126  if(peptide_size > 0)
127  {
128  peptide_list.append(sequence.mid(peptide_start, peptide_size));
129  }
130 
131  unsigned int start = 1;
132  bool is_nter = true;
133  foreach(const QString &peptide, peptide_list)
134  {
135  // enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy,
136  // peptide, start,is_nter,0, false);
137  sanityCheck(enzyme_product,
138  sequence_database_id,
139  protein_sp,
140  is_decoy,
141  peptide,
142  start,
143  is_nter,
144  0,
145  false);
146  is_nter = false;
147  start += peptide.size();
148  }
149 
150  unsigned int miscleavage_i = 0;
151  while(miscleavage_i < m_miscleavage)
152  {
153  miscleavage_i++;
154  qDebug() << "miscleavage_i=" << miscleavage_i;
155  int chunk_number = miscleavage_i + 1;
156  unsigned int start = 1;
157  bool is_nter = true;
158 
159  for(auto i = 0; i < peptide_list.size(); ++i)
160  {
161  qDebug() << "start=" << start;
162  QStringList peptide_mis_list;
163  for(auto j = 0; (j < chunk_number) && ((i + j) < peptide_list.size());
164  j++)
165  {
166  peptide_mis_list << peptide_list.at(i + j);
167  }
168  if(peptide_mis_list.size() == chunk_number)
169  {
170  // enzyme_product.setPeptide(sequence_database_id,
171  // protein_sp,is_decoy, peptide_mis_list.join(""), start,is_nter,
172  // miscleavage_i, false);
173  sanityCheck(enzyme_product,
174  sequence_database_id,
175  protein_sp,
176  is_decoy,
177  peptide_mis_list.join(""),
178  start,
179  is_nter,
180  miscleavage_i,
181  false);
182  }
183  is_nter = false;
184  start += peptide_list.at(i).size();
185  }
186  }
187 }
188 
189 void
190 Enzyme::replaceWildcards(std::vector<std::string> *p_peptide_variant_list) const
191 {
192  std::string new_peptide = p_peptide_variant_list->at(0);
193  qDebug() << "Enzyme::replaceWildcards begin " << new_peptide.c_str();
194  std::vector<std::string> old_peptide_variant_list;
195  old_peptide_variant_list.assign(p_peptide_variant_list->begin(),
196  p_peptide_variant_list->end());
197 
198 
199  for(char wildcard : {'X', 'B', 'Z'})
200  {
201 
202  std::size_t position = new_peptide.find(wildcard);
203  if(position == std::string::npos)
204  {
205  continue;
206  }
207  else
208  {

References m_recognitionSite.

◆ getMiscleavage()

unsigned int pappso::Enzyme::getMiscleavage ( ) const

get the maximum number of missed cleavage allowed in the digestion

Returns
miscleavage maximum number of missed cleavade to allow (defaults is 0)

Definition at line 97 of file enzyme.cpp.

98  :
99  seq_stack.pop(0)
100  s2 = ""

◆ getQRegExpRecognitionSite()

const QRegExp & pappso::Enzyme::getQRegExpRecognitionSite ( ) const

Definition at line 369 of file enzyme.cpp.

◆ replaceWildcards()

void pappso::Enzyme::replaceWildcards ( std::vector< std::string > *  p_peptide_variant_list) const
private

Definition at line 211 of file enzyme.cpp.

218  {
219  p_x_replace_wildcard = &m_wildCardX;
220  }
221  else if(wildcard == 'B')
222  {
223  p_x_replace_wildcard = &m_wildCardB;
224  }
225  else if(wildcard == 'Z')
226  {
227  p_x_replace_wildcard = &m_wildCardZ;
228  }
229 
230  if(p_x_replace_wildcard != nullptr)
231  {
232  for(std::string orig_peptide : old_peptide_variant_list)
233  {
234  for(char replace : *p_x_replace_wildcard)
235  {
236  orig_peptide[position] = replace;
237  p_peptide_variant_list->push_back(orig_peptide);
238  }
239  }
240  }
241  else
242  {
243  throw ExceptionNotPossible(
244  QObject::tr("x_replace_wildcard is empty"));
245  }
246  // new_peptide[position] = 'A';
247  // p_peptide_variant_list->push_back(new_peptide);
248  // p_peptide_variant_list->resize(1);
249  // std::cerr << "Enzyme::replaceWildcards begin
250  // p_peptide_variant_list.size()=" << p_peptide_variant_list->size()
251  // <<
252  // endl;
253  break;
254  }
255  }
256  std::vector<std::string>().swap(
257  old_peptide_variant_list); // clear old_peptide_variant_list reallocating
258 
259 
260  qDebug() << "Enzyme::replaceWildcards end " << new_peptide.c_str();
261 }
262 
263 void
264 Enzyme::setTakeOnlyFirstWildcard(bool take_only_first_wildcard)
265 {
266  m_takeOnlyFirstWildcard = take_only_first_wildcard;
267 }
268 
269 
270 void
271 Enzyme::sanityCheck(EnzymeProductInterface &enzyme_product,
272  std::int8_t sequence_database_id,
273  const ProteinSp &protein_sp,
274  bool is_decoy,
275  const PeptideStr &peptide,
276  unsigned int start,
277  bool is_nter,
278  unsigned int missed_cleavage_number,
279  bool semi_enzyme) const
280 {
281  if(peptide.contains('X') || peptide.contains('B') || peptide.contains('Z'))
282  {

References m_wildCardX.

Referenced by setTakeOnlyFirstWildcard().

◆ sanityCheck()

void pappso::Enzyme::sanityCheck ( EnzymeProductInterface enzyme_product,
std::int8_t  sequence_database_id,
const ProteinSp protein_sp,
bool  is_decoy,
const PeptideStr peptide,
unsigned int  start,
bool  is_nter,
unsigned int  missed_cleavage_number,
bool  semi_enzyme 
) const
private

Definition at line 292 of file enzyme.cpp.

293  {
294  peptide_variant_list.resize(m_maxPeptideVariantListSize);
295  peptide_variant_list.shrink_to_fit();
296  }
297  }
298 
299  // peptide_variant_list.resize(2);
301  {
302  enzyme_product.setPeptide(sequence_database_id,
303  protein_sp,
304  is_decoy,
305  QString(peptide_variant_list.at(0).c_str()),
306  start,
307  is_nter,
308  missed_cleavage_number,
309  semi_enzyme);
310  }
311  else
312  {
313  std::string peptide_variant = peptide_variant_list.back();
314  while(peptide_variant_list.size() > 0)
315  {
316  enzyme_product.setPeptide(sequence_database_id,
317  protein_sp,
318  is_decoy,
319  QString(peptide_variant.c_str()),
320  start,
321  is_nter,
322  missed_cleavage_number,
323  semi_enzyme);
324  peptide_variant_list.pop_back();
325  if(peptide_variant_list.size() > 0)
326  {
327  peptide_variant = peptide_variant_list.back();
328  }
329  }
330  }
331  std::vector<std::string>().swap(
332  peptide_variant_list); // clear peptide_variant_list reallocating
333  }
334  else
335  {
336  enzyme_product.setPeptide(sequence_database_id,
337  protein_sp,
338  is_decoy,
339  peptide,
340  start,
341  is_nter,
342  missed_cleavage_number,
343  semi_enzyme);
344  }
345 }
346 
347 const QRegExp &
349 {
350  return m_recognitionSite;
351 }
352 } // namespace pappso

References m_maxPeptideVariantListSize.

◆ setMaxPeptideVariantListSize()

void pappso::Enzyme::setMaxPeptideVariantListSize ( std::size_t  max_peptide_variant_list_size)

if there are wildcards in the protein sequence : restrict the number of possible peptide sequences

Parameters
max_peptide_variant_list_sizemaximum number of peptide variant (default is 100)

Definition at line 102 of file enzyme.cpp.

113  {

◆ setMiscleavage()

void pappso::Enzyme::setMiscleavage ( unsigned int  miscleavage)

sets the maximum number of missed cleavage allowed in the digestion

Parameters
miscleavagemaximum number of missed cleavade to allow (defaults is 0)

Definition at line 92 of file enzyme.cpp.

◆ setTakeOnlyFirstWildcard()

void pappso::Enzyme::setTakeOnlyFirstWildcard ( bool  take_only_first_wildcard)

take only first m_takeOnlyFirstWildcard

Parameters
booltrue : switch to take only the first possibility if there are X, B or Z wildcards in sequence

Definition at line 285 of file enzyme.cpp.

References replaceWildcards().

Member Data Documentation

◆ m_maxPeptideVariantListSize

std::size_t pappso::Enzyme::m_maxPeptideVariantListSize = 100
private

Definition at line 135 of file enzyme.h.

Referenced by sanityCheck().

◆ m_miscleavage

unsigned int pappso::Enzyme::m_miscleavage = 0
private

Definition at line 132 of file enzyme.h.

Referenced by Enzyme().

◆ m_recognitionSite

QRegExp pappso::Enzyme::m_recognitionSite
private

example with a kinase == [K,R]

Definition at line 131 of file enzyme.h.

Referenced by eat().

◆ m_takeOnlyFirstWildcard

bool pappso::Enzyme::m_takeOnlyFirstWildcard = false
private

Definition at line 133 of file enzyme.h.

◆ m_wildCardB

std::vector<char> pappso::Enzyme::m_wildCardB
private

Definition at line 139 of file enzyme.h.

◆ m_wildCardX

std::vector<char> pappso::Enzyme::m_wildCardX
private

Definition at line 138 of file enzyme.h.

Referenced by replaceWildcards().

◆ m_wildCardZ

std::vector<char> pappso::Enzyme::m_wildCardZ
private

Definition at line 140 of file enzyme.h.


The documentation for this class was generated from the following files:
pappso::Enzyme::setTakeOnlyFirstWildcard
void setTakeOnlyFirstWildcard(bool take_only_first_wildcard)
take only first m_takeOnlyFirstWildcard
Definition: enzyme.cpp:285
pappso::Enzyme::m_miscleavage
unsigned int m_miscleavage
Definition: enzyme.h:132
pappso::Enzyme::replaceWildcards
void replaceWildcards(std::vector< std::string > *p_peptide_variant_list) const
Definition: enzyme.cpp:211
pappso::PeptideStr
QString PeptideStr
A type definition for PeptideStr.
Definition: types.h:64
pappso::Enzyme::setMaxPeptideVariantListSize
void setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size)
if there are wildcards in the protein sequence : restrict the number of possible peptide sequences
Definition: enzyme.cpp:102
pappso::Enzyme::m_wildCardZ
std::vector< char > m_wildCardZ
Definition: enzyme.h:140
pappso::Enzyme::~Enzyme
~Enzyme()
Definition: enzyme.cpp:87
pappso::Enzyme::getMiscleavage
unsigned int getMiscleavage() const
get the maximum number of missed cleavage allowed in the digestion
Definition: enzyme.cpp:97
pappso::Enzyme::m_wildCardB
std::vector< char > m_wildCardB
Definition: enzyme.h:139
pappso::Enzyme::m_maxPeptideVariantListSize
std::size_t m_maxPeptideVariantListSize
Definition: enzyme.h:135
pappso::ProteinSp
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition: protein.h:64
pappso::Enzyme::m_takeOnlyFirstWildcard
bool m_takeOnlyFirstWildcard
Definition: enzyme.h:133
pappso::Enzyme::m_recognitionSite
QRegExp m_recognitionSite
example with a kinase == [K,R]
Definition: enzyme.h:131
pappso::Enzyme::m_wildCardX
std::vector< char > m_wildCardX
Definition: enzyme.h:138
pappso::Enzyme::getQRegExpRecognitionSite
const QRegExp & getQRegExpRecognitionSite() const
Definition: enzyme.cpp:369
pappso::Enzyme::sanityCheck
void sanityCheck(EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
Definition: enzyme.cpp:292