 |
OpenMS
2.4.0
|
Go to the documentation of this file.
37 #include <OpenMS/config.h>
94 template <
class HitType>
104 higher_score_better(higher_score_better_)
109 if (higher_score_better)
111 return hit.getScore() >= score;
113 return hit.getScore() <= score;
122 template <
class HitType>
134 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
140 Size hit_rank = hit.getRank();
145 return hit_rank <= rank;
154 template <
class HitType>
170 if (found.
isEmpty())
return false;
171 if (value.
isEmpty())
return true;
172 return found == value;
177 template <
class HitType>
193 if (found.
isEmpty())
return false;
194 return double(found) <= value;
199 template <
class HitType>
207 target_decoy(
"target_decoy",
"decoy"), is_decoy(
"isDecoy",
"true")
215 return target_decoy(hit) || is_decoy(hit);
224 template <
class HitType>
232 accessions(accessions_)
238 for (std::set<String>::iterator it = present_accessions.begin();
239 it != present_accessions.end(); ++it)
241 if (accessions.count(*it) > 0)
return true;
262 template <
class HitType,
class Entry>
271 for(
typename std::vector<Entry>::iterator rec_it = records.begin();
272 rec_it != records.end(); ++rec_it)
274 items[getKey(*rec_it)] = &(*rec_it);
287 return items.count(getHitKey(hit)) > 0;
297 if(!exists(evidence)){
298 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '"+ getHitKey(evidence) +
"'. peptide evidence accession not in data");
300 return *(items.find(getHitKey(evidence))->second);
317 struct HasMinPeptideLength;
323 struct HasLowMZError;
330 struct HasMatchingModification;
337 struct HasMatchingSequence;
340 struct HasNoEvidence;
358 digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
369 [&](
const Int missed_cleavages)
372 bool max_filter = max_cleavages_ != disabledValue() ?
373 missed_cleavages > max_cleavages_ :
false;
374 bool min_filter = min_cleavages_ != disabledValue() ?
375 missed_cleavages < min_cleavages_ :
false;
376 return max_filter || min_filter;
382 hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
405 bool ignore_missed_cleavages,
406 bool methionine_cleavage) :
407 accession_resolver_(entries),
408 digestion_(digestion),
409 ignore_missed_cleavages_(ignore_missed_cleavages),
410 methionine_cleavage_(methionine_cleavage)
421 if (accession_resolver_.
exists(evidence))
425 evidence.
getStart(), evidence.
getEnd() - evidence.
getStart(), ignore_missed_cleavages_, methionine_cleavage_);
431 LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
436 <<
"' not found in fasta file!" << std::endl;
444 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this,peptides);
457 template <
class IdentificationType>
464 return id.getHits().empty();
491 template <
class Container,
class Predicate>
494 items.erase(std::remove_if(items.begin(), items.end(), pred),
499 template <
class Container,
class Predicate>
502 items.erase(std::remove_if(items.begin(), items.end(), std::not1(pred)),
513 template <
class IdentificationType>
517 for (
typename std::vector<IdentificationType>::const_iterator id_it =
518 ids.begin(); id_it != ids.end(); ++id_it)
520 counter += id_it->getHits().size();
537 template <
class IdentificationType>
539 const std::vector<IdentificationType>& identifications,
540 bool assume_sorted,
typename IdentificationType::HitType& best_hit)
542 if (identifications.empty())
return false;
544 typename std::vector<IdentificationType>::const_iterator best_id_it =
545 identifications.end();
546 typename std::vector<typename IdentificationType::HitType>::const_iterator
549 for (
typename std::vector<IdentificationType>::const_iterator id_it =
550 identifications.begin(); id_it != identifications.end(); ++id_it)
552 if (id_it->getHits().empty())
continue;
554 if (best_id_it == identifications.end())
557 best_hit_it = id_it->getHits().begin();
559 else if (best_id_it->getScoreType() != id_it->getScoreType())
561 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
564 bool higher_better = best_id_it->isHigherScoreBetter();
565 for (
typename std::vector<typename IdentificationType::HitType>::
566 const_iterator hit_it = id_it->getHits().begin(); hit_it !=
567 id_it->getHits().end(); ++hit_it)
569 if ((higher_better && (hit_it->getScore() >
570 best_hit_it->getScore())) ||
571 (!higher_better && (hit_it->getScore() <
572 best_hit_it->getScore())))
574 best_hit_it = hit_it;
576 if (assume_sorted)
break;
580 if (best_id_it == identifications.end())
585 best_hit = *best_hit_it;
596 static void extractPeptideSequences(
597 const std::vector<PeptideIdentification>& peptides,
598 std::set<String>& sequences,
bool ignore_mods =
false);
606 template<
class Ev
idenceFilter>
608 EvidenceFilter& filter,
609 std::vector<PeptideIdentification>& peptides)
611 for(std::vector<PeptideIdentification>::iterator pep_it = peptides.begin();
612 pep_it != peptides.end(); ++pep_it)
614 for(std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin();
615 hit_it != pep_it->getHits().end(); ++hit_it )
617 std::vector<PeptideEvidence> evidences;
618 remove_copy_if(hit_it->getPeptideEvidences().begin(),
619 hit_it->getPeptideEvidences().end(),
620 back_inserter(evidences),
622 hit_it->setPeptideEvidences(evidences);
635 template <
class IdentificationType>
638 for (
typename std::vector<IdentificationType>::iterator it = ids.begin();
639 it != ids.end(); ++it)
646 static void removeUnreferencedProteins(
647 std::vector<ProteinIdentification>& proteins,
648 const std::vector<PeptideIdentification>& peptides);
657 static void updateProteinReferences(
658 std::vector<PeptideIdentification>& peptides,
659 const std::vector<ProteinIdentification>& proteins,
660 bool remove_peptides_without_reference =
false);
670 static bool updateProteinGroups(
671 std::vector<ProteinIdentification::ProteinGroup>& groups,
672 const std::vector<ProteinHit>& hits);
681 template <
class IdentificationType>
684 struct HasNoHits<IdentificationType> empty_filter;
685 removeMatchingItems(ids, empty_filter);
693 template <
class IdentificationType>
695 double threshold_score)
697 for (
typename std::vector<IdentificationType>::iterator id_it =
698 ids.begin(); id_it != ids.end(); ++id_it)
700 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
701 threshold_score, id_it->isHigherScoreBetter());
702 keepMatchingItems(id_it->getHits(), score_filter);
711 template <class IdentificationType>
712 static void filterHitsBySignificance(std::vector<IdentificationType>& ids,
713 double threshold_fraction = 1.0)
715 for (
typename std::vector<IdentificationType>::iterator id_it =
716 ids.begin(); id_it != ids.end(); ++id_it)
718 double threshold_score = (threshold_fraction *
719 id_it->getSignificanceThreshold());
720 struct HasGoodScore<typename IdentificationType::HitType> score_filter(
721 threshold_score, id_it->isHigherScoreBetter());
722 keepMatchingItems(id_it->getHits(), score_filter);
731 template <class IdentificationType>
732 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
734 for (
typename std::vector<IdentificationType>::iterator id_it =
735 ids.begin(); id_it != ids.end(); ++id_it)
738 if (n < id_it->getHits().size()) id_it->getHits().resize(n);
756 template <
class IdentificationType>
763 struct HasMaxRank<typename IdentificationType::HitType>
764 rank_filter(min_rank - 1);
765 for (typename std::vector<IdentificationType>::iterator id_it =
766 ids.begin(); id_it != ids.end(); ++id_it)
768 removeMatchingItems(id_it->getHits(), rank_filter);
771 if (max_rank >= min_rank)
773 struct HasMaxRank<typename IdentificationType::HitType>
774 rank_filter(max_rank);
775 for (typename std::vector<IdentificationType>::iterator id_it =
776 ids.begin(); id_it != ids.end(); ++id_it)
778 keepMatchingItems(id_it->getHits(), rank_filter);
790 template <
class IdentificationType>
795 for (typename std::vector<IdentificationType>::iterator id_it =
796 ids.begin(); id_it != ids.end(); ++id_it)
798 removeMatchingItems(id_it->getHits(), decoy_filter);
809 template <
class IdentificationType>
811 const std::set<String> accessions)
814 acc_filter(accessions);
815 for (typename std::vector<IdentificationType>::iterator id_it =
816 ids.begin(); id_it != ids.end(); ++id_it)
818 removeMatchingItems(id_it->getHits(), acc_filter);
829 template <
class IdentificationType>
831 const std::set<String> accessions)
834 acc_filter(accessions);
835 for (typename std::vector<IdentificationType>::iterator id_it =
836 ids.begin(); id_it != ids.end(); ++id_it)
838 keepMatchingItems(id_it->getHits(), acc_filter);
856 static void keepBestPeptideHits(
857 std::vector<PeptideIdentification>& peptides,
bool strict =
false);
867 static void filterPeptidesByLength(
868 std::vector<PeptideIdentification>& peptides,
Size min_length,
869 Size max_length = UINT_MAX);
879 static void filterPeptidesByCharge(
880 std::vector<PeptideIdentification>& peptides,
Int min_charge,
884 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
885 double min_rt,
double max_rt);
888 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
889 double min_mz,
double max_mz);
902 static void filterPeptidesByMZError(
903 std::vector<PeptideIdentification>& peptides,
double mass_error,
913 template <
class Filter>
914 static void filterPeptideEvidences(
916 std::vector<PeptideIdentification>& peptides);
929 static void filterPeptidesByRTPredictPValue(
930 std::vector<PeptideIdentification>& peptides,
931 const String& metavalue_key,
double threshold = 0.05);
934 static void removePeptidesWithMatchingModifications(
935 std::vector<PeptideIdentification>& peptides,
936 const std::set<String>& modifications);
939 static void keepPeptidesWithMatchingModifications(
940 std::vector<PeptideIdentification>& peptides,
941 const std::set<String>& modifications);
950 static void removePeptidesWithMatchingSequences(
951 std::vector<PeptideIdentification>& peptides,
952 const std::vector<PeptideIdentification>& bad_peptides,
953 bool ignore_mods =
false);
962 static void keepPeptidesWithMatchingSequences(
963 std::vector<PeptideIdentification>& peptides,
964 const std::vector<PeptideIdentification>& good_peptides,
965 bool ignore_mods =
false);
968 static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>&
976 static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>&
977 peptides,
bool seq_only =
false);
987 double peptide_threshold_score,
988 double protein_threshold_score)
992 protein_threshold_score);
998 exp_it != experiment.
end(); ++exp_it)
1000 filterHitsByScore(exp_it->getPeptideIdentifications(),
1001 peptide_threshold_score);
1002 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1003 updateProteinReferences(exp_it->getPeptideIdentifications(),
1011 double peptide_threshold_fraction,
1012 double protein_threshold_fraction)
1016 protein_threshold_fraction);
1022 exp_it != experiment.
end(); ++exp_it)
1024 filterHitsBySignificance(exp_it->getPeptideIdentifications(),
1025 peptide_threshold_fraction);
1026 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1027 updateProteinReferences(exp_it->getPeptideIdentifications(),
1038 std::vector<PeptideIdentification> all_peptides;
1042 exp_it != experiment.
end(); ++exp_it)
1044 std::vector<PeptideIdentification>& peptides =
1045 exp_it->getPeptideIdentifications();
1046 keepNBestHits(peptides, n);
1047 removeEmptyIdentifications(peptides);
1048 updateProteinReferences(peptides,
1050 all_peptides.insert(all_peptides.end(), peptides.begin(),
1061 const std::vector<FASTAFile::FASTAEntry>& proteins)
1063 std::set<String> accessions;
1064 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it =
1065 proteins.begin(); it != proteins.end(); ++it)
1067 accessions.insert(it->identifier);
1077 exp_it != experiment.
end(); ++exp_it)
1079 if (exp_it->getMSLevel() == 2)
1081 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(),
1083 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1084 updateHitRanks(exp_it->getPeptideIdentifications());
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:102
Base class for TOPP applications.
Definition: TOPPBase.h:150
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a MzIdentML file.
Used to load (storing not supported, yet) ProtXML files.
Definition: ProtXMLFile.h:70
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:492
Representation of a Sequest output file.
Definition: SequestOutfile.h:61
ConstIterator end() const
Gives access to the underlying text buffer.
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
Used to load Mascot XML files.
Definition: MascotXMLFile.h:57
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:74
Definition: PercolatorOutfile.h:58
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
void load(const String &filename, ProteinIdentification &protein_ids, PeptideIdentification &peptide_ids)
Loads the identifications of an ProtXML file without identifier.
EnzymaticDigestion & digestion_
Definition: IDFilter.h:351
static void keepNBestHits(std::vector< IdentificationType > &ids, Size n)
Filters peptide or protein identifications according to the score of the hits, keeping the n best hit...
Definition: IDFilter.h:732
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
static const std::string score_type_names[SIZE_OF_SCORETYPE]
Names of Percolator scores (to match ScoreType)
Definition: PercolatorOutfile.h:61
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
bool operator()(const HitType &hit) const
Definition: IDFilter.h:138
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
static void filterHitsBySignificance(std::vector< IdentificationType > &ids, double threshold_fraction=1.0)
Filters peptide or protein identifications according to the significance threshold of the hits.
Definition: IDFilter.h:712
bool operator()(const HitType &hit) const
Definition: IDFilter.h:107
IdentificationType argument_type
Definition: IDFilter.h:460
double score
Definition: IDFilter.h:99
String identifier
Definition: FASTAFile.h:78
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:70
Representation of a set of modification definitions.
Definition: ModificationDefinitionsSet.h:58
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:830
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:810
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:290
Invalid value exception.
Definition: Exception.h:335
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
MzML file (.mzML)
Definition: FileTypes.h:72
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
TPP pepXML file (.pepXML)
Definition: FileTypes.h:75
void setSearchEngineVersion(const String &search_engine_version)
Sets the search engine version.
void load(const String &filename, std::vector< PeptideIdentification > &pep_ids, std::vector< ProteinIdentification > &prot_ids)
Load the content of the xquest.xml file into the provided data structures.
A more convenient string class.
Definition: String.h:57
Iterator begin()
Definition: MSExperiment.h:157
PeptideHit argument_type
Definition: IDFilter.h:356
bool exists(const HitType &hit) const
Definition: IDFilter.h:285
static ModificationsDB * getInstance(OpenMS::String unimod_file="CHEMISTRY/unimod.xml", OpenMS::String psimod_file="CHEMISTRY/PSI-MOD.obo", OpenMS::String xlmod_file="CHEMISTRY/XLMOD.obo")
Returns a pointer to the modifications DB (singleton)
Definition: ModificationsDB.h:77
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:62
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:363
Used to load OMSSAXML files.
Definition: OMSSAXMLFile.h:60
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:231
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
Invalid conversion exception.
Definition: Exception.h:362
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
#define LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:446
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:413
void addMSLevel(int level)
adds a desired MS level for peaks to load
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Representation of a protein hit.
Definition: ProteinHit.h:53
Size rank
Definition: IDFilter.h:127
void load(const String &filename, ProteinIdentification &proteins, std::vector< PeptideIdentification > &peptides, SpectrumMetaDataLookup &lookup, enum ScoreType output_score=QVALUE)
Loads a Percolator output file.
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:454
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:246
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
msInspect file (.tsv)
Definition: FileTypes.h:87
GetMatchingItems()
Definition: IDFilter.h:278
int exception
(Used by various macros. Indicates a rough category of the exception being caught....
ScoreType
Types of Percolator scores.
Definition: PercolatorOutfile.h:58
String toString() const
returns the peptide as string with modifications embedded in brackets
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
static Int disabledValue()
Definition: IDFilter.h:361
String toUnmodifiedString() const
returns the peptide as string without any modifications
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:269
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
Class for reading Percolator tab-delimited output files.
Definition: PercolatorOutfile.h:52
PeptideEvidence argument_type
Definition: IDFilter.h:395
Unknown file extension.
Definition: FileTypes.h:60
static Specificity getSpecificityByName(const String &name)
FASTA file (.fasta)
Definition: FileTypes.h:92
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
void endProgress() const
Ends the progress display.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:757
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
Size size() const
Definition: MSExperiment.h:127
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:68
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:225
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:266
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
void load(const String &result_filename, std::vector< PeptideIdentification > &peptide_identifications, ProteinIdentification &protein_identification, const double p_value_threshold, std::vector< double > &pvalues, const String &database="", const bool ignore_proteins_per_peptide=false)
loads data from a Sequest outfile
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:175
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:500
bool operator()(const HitType &hit) const
Definition: IDFilter.h:210
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:65
const std::set< String > & accessions
Definition: IDFilter.h:229
static enum ScoreType getScoreType(String score_type_name)
Return a score type given its name.
bool methionine_cleavage_
Definition: IDFilter.h:401
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
int Int
Signed integer type.
Definition: Types.h:102
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
const String & getAccession() const
returns the accession of the protein
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:111
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:380
HitType argument_type
Definition: IDFilter.h:227
Used to load and store xQuest result files.
Definition: XQuestResultXMLFile.h:55
HasMaxRank(Size rank_)
Definition: IDFilter.h:129
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:112
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, const SpectrumMetaDataLookup &lookup)
Loads data from a Mascot XML file.
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:450
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:56
Size findByNativeID(const String &native_id) const
Look up spectrum by native ID.
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:280
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1034
static bool isDirectory(const String &path)
Return true if the given path specifies a directory.
Facilitates file handling by file type recognition.
Definition: FileHandler.h:62
Helper class for looking up spectra based on different attributes.
Definition: SpectrumLookup.h:67
void addIonMatchStatistics(PeptideIdentification &pi, MSSpectrum &spec, const TheoreticalSpectrumGenerator &tg, const SpectrumAlignment &sa) const
Adds ion match statistics to pi PeptideIdentifcation.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:75
bool loadExperiment(const String &filename, MSExperiment &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=true, const bool compute_hash=true)
Loads a file into an MSExperiment.
HitType argument_type
Definition: IDFilter.h:265
Type
Actual file types enum.
Definition: FileTypes.h:58
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
xQuest XML file format for protein-protein cross-link identifications (.xquest.xml)
Definition: FileTypes.h:111
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
void readSpectra(const SpectrumContainer &spectra, const String &scan_regexp=default_scan_regexp)
Read and index spectra for later look-up.
Definition: SpectrumLookup.h:103
HitType argument_type
Definition: IDFilter.h:202
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
void addReferenceFormat(const String ®exp)
Register a possible format for a spectrum reference.
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:442
bool filterByMissedCleavages(const String &sequence, std::function< bool(const Int)> filter) const
Filter based on the number of missed cleavages.
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:636
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:295
static void filterHitsBySignificance(PeakMap &experiment, double peptide_threshold_fraction, double protein_threshold_fraction)
Filters an MS/MS experiment according to fractions of the significance thresholds.
Definition: IDFilter.h:1010
void setParameters(const Param ¶m)
Sets the parameters.
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:251
const Param & getDefaults() const
Non-mutable access to the default parameters.
HasDecoyAnnotation()
Definition: IDFilter.h:206
std::vector< String >::iterator Iterator
Mutable iterator.
Definition: TextFile.h:54
Mascot XML file format for peptide identifications (.xml)
Definition: FileTypes.h:84
HitType argument_type
Definition: IDFilter.h:125
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:514
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
OMSSA XML file format for peptide identifications (.xml)
Definition: FileTypes.h:83
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
bool higher_score_better
Definition: IDFilter.h:100
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
void setSequence(const AASequence &sequence)
sets the peptide sequence
bool operator()(PeptideHit &p)
Definition: IDFilter.h:365
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
TPP protXML file (.protXML)
Definition: FileTypes.h:76
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:263
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
static void initializeLookup(SpectrumMetaDataLookup &lookup, const PeakMap &experiment, const String &scan_regex="")
Initializes a helper object for looking up spectrum meta data (RT, m/z)
bool ignore_missed_cleavages_
Definition: IDFilter.h:400
void setSearchEngine(const String &search_engine)
Sets the search engine type.
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:393
ConstIterator begin() const
Gives access to the underlying text buffer.
void store(String filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Generates theoretical spectra with various options.
Definition: TheoreticalSpectrumGenerator.h:63
ItemMap items
Definition: IDFilter.h:267
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
String getEnzymeName() const
Returns the enzyme for the digestion.
static String absolutePath(const String &file)
Replaces the relative path in the argument with the absolute path.
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Parse Error exception.
Definition: Exception.h:622
void store(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false)
Stores idXML as PepXML file.
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:458
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
static std::vector< PeptideHit > getReferencingHits(const std::vector< PeptideHit > &, const std::set< String > &accession)
returns all peptide hits which reference to a given protein accession (i.e. filter by protein accessi...
Iterator end()
Definition: MSExperiment.h:167
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Definition: EnzymaticDigestion.h:71
Int max_cleavages_
Definition: IDFilter.h:353
void load(const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup)
Loads peptide sequences with modifications out of a PepXML file.
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:235
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:682
static Type nameToType(const String &name)
Converts a file type name into a Type.
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
const char * getMessage() const noexcept
Returns the message.
ProteaseDigestion & digestion_
Definition: IDFilter.h:399
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:95
HitType argument_type
Definition: IDFilter.h:97
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:55
void getAllSearchModifications(std::vector< String > &modifications) const
Collects all modifications that can be used for identification searches.
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:403
Management and storage of parameters / INI files.
Definition: Param.h:74
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:538
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:348
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1059
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:398
Used to load and store PepXML files.
Definition: PepXMLFile.h:62
String & ensureLastChar(char end)
Makes sure the string ends with the character end.
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:694
Int min_cleavages_
Definition: IDFilter.h:352
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
void setIdentifier(const String &id)
Sets the identifier.
static bool fileList(const String &dir, const String &file_pattern, StringList &output, bool full_path=false)
Retrieves a list of files matching file_pattern in directory dir (returns filenames without paths unl...
Specificity getSpecificity() const
Returns the specificity for the digestion.
Element could not be found exception.
Definition: Exception.h:662
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:791
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a xQuest XML file.
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:986
Search parameters of the DB search.
Definition: ProteinIdentification.h:103
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, bool load_proteins=true, bool load_empty_hits=true)
loads data from a OMSSAXML file
Is this a decoy hit?
Definition: IDFilter.h:200
void load(const String &filename, bool trim_lines=false, Int first_n=-1, bool skip_empty_lines=false)
Loads data from a text file.
String toString(T i)
toString functions (single argument)
Definition: StringUtils.h:68
any XML format
Definition: FileTypes.h:98
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Annotates spectra from identifications and theoretical spectra or identifications from spectra and th...
Definition: SpectrumAnnotator.h:60
Used to load and store idXML files.
Definition: IdXMLFile.h:63
Percolator tab-delimited output (PSM level)
Definition: FileTypes.h:106
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:123
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:607
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:357
Command line progress.
Definition: ProgressLogger.h:72
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:462
Representation of a peptide hit.
Definition: PeptideHit.h:54