 |
OpenMS
2.4.0
|
Go to the documentation of this file.
44 #include <boost/math/special_functions/fpclassify.hpp>
66 typedef std::map<std::pair<String, String>, std::vector<PeptideHit> >
MapAccPepType;
69 void store(
const String& filename,
const MzTab& mz_tab)
const;
72 void storeProteinReliabilityColumn(
bool store);
73 void storePeptideReliabilityColumn(
bool store);
74 void storePSMReliabilityColumn(
bool store);
75 void storeSmallMoleculeReliabilityColumn(
bool store);
76 void storeProteinUriColumn(
bool store);
77 void storePeptideUriColumn(
bool store);
78 void storePSMUriColumn(
bool store);
79 void storeSmallMoleculeUriColumn(
bool store);
80 void storeProteinGoTerms(
bool store);
100 String generateMzTabProteinHeader_(
const MzTabProteinSectionRow& reference_row,
const Size n_best_search_engine_scores,
const std::vector<String>& optional_columns)
const;
106 String generateMzTabPeptideHeader_(
Size search_ms_runs,
Size n_best_search_engine_scores,
Size n_search_engine_score,
Size assays,
Size study_variables,
const std::vector<String>& optional_columns)
const;
112 String generateMzTabPSMHeader_(
Size n_search_engine_scores,
const std::vector<String>& optional_columns)
const;
114 String generateMzTabPSMSectionRow_(
const MzTabPSMSectionRow& row,
const std::vector<String>& optional_columns)
const;
118 String generateMzTabSmallMoleculeHeader_(
Size search_ms_runs,
Size n_best_search_engine_scores,
Size n_search_engine_score,
Size assays,
Size study_variables,
const std::vector<String>& optional_columns)
const;
124 static std::pair<int, int> extractIndexPairsFromBrackets_(
const String& s);
126 static void sortPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
128 static void keepFirstPSM_(std::vector<PeptideIdentification>::iterator begin, std::vector<PeptideIdentification>::iterator end);
131 static void partitionIntoRuns_(
const std::vector<PeptideIdentification>& pep_ids,
132 const std::vector<ProteinIdentification>& pro_ids,
133 std::map<
String, std::vector<PeptideIdentification> >& map_run_to_pepids,
134 std::map<
String, std::vector<ProteinIdentification> >& map_run_to_proids
139 static void createProteinToPeptideLinks_(
const std::map<
String, std::vector<PeptideIdentification> >& map_run_to_pepids,
MapAccPepType& map_run_accession_to_pephits);
148 static String mapSearchEngineToCvParam_(
const String& openms_search_engine_name);
150 static String mapSearchEngineScoreToCvParam_(
const String& openms_search_engine_name,
double score,
String score_type);
152 static String extractNumPeptides_(
const String& common_identifier,
const String& protein_accession,
156 static String extractNumPeptidesDistinct_(
String common_identifier,
String protein_accession,
160 static String extractNumPeptidesUnambiguous_(
String common_identifier,
String protein_accession,
163 static std::map<String, Size> extractNumberOfSubSamples_(
const std::map<
String, std::vector<ProteinIdentification> >& map_run_to_proids);
165 static void writePeptideHeader_(
SVOutStream& output, std::map<String, Size> n_sub_samples);
167 static void writeProteinHeader_(
SVOutStream& output, std::map<String, Size> n_sub_samples);
175 const std::map<String, Size>& map_run_to_num_sub
@ IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:66
Int getCharge() const
returns the charge of the peptide
void setSeparator(char sep)
MzTabDoubleList retention_time
Definition: MzTab.h:619
Base class for TOPP applications.
Definition: TOPPBase.h:150
bool store_psm_reliability_
Definition: MzTabFile.h:88
MzTabString sequence
Definition: MzTab.h:635
const String getUniModAccession() const
returns the unimod accession if available
File adapter for MzTab files.
Definition: MzTabFile.h:58
const ResidueModification & getModification(Size index) const
Returns the modification with the given index.
void fromCellString(const String &s) override
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
Representation of a modification.
Definition: ResidueModification.h:76
MzTabString accession
Definition: MzTab.h:581
CoordinateType minX() const
Accessor for min_ coordinate minimum.
Definition: DIntervalBase.h:240
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::vector< MzTabPSMSectionRow > MzTabPSMSectionRows
Definition: MzTab.h:690
void setName(const String &name)
void setModificationIdentifier(const MzTabString &mod_id)
@ ANYWHERE
Definition: ResidueModification.h:97
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
MzTabDoubleList retention_time
Definition: MzTab.h:645
std::vector< MzTabProteinSectionRow > MzTabProteinSectionRows
Definition: MzTab.h:688
std::vector< MzTabOptionalColumnEntry > opt_
Definition: MzTab.h:629
double probability
Probability of this group.
Definition: ProteinIdentification.h:74
A more convenient string class.
Definition: String.h:57
bool store_protein_uri_
Definition: MzTabFile.h:90
static ModificationsDB * getInstance(OpenMS::String unimod_file="CHEMISTRY/unimod.xml", OpenMS::String psimod_file="CHEMISTRY/PSI-MOD.obo", OpenMS::String xlmod_file="CHEMISTRY/XLMOD.obo")
Returns a pointer to the modifications DB (singleton)
Definition: ModificationsDB.h:77
const String & getId() const
returns the identifier of the modification
MzTabDoubleList retention_time_window
Definition: MzTab.h:620
@ CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:67
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
double getMonoWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
Representation of a protein hit.
Definition: ProteinHit.h:53
@ N_TERM
Definition: ResidueModification.h:99
double getCoverage() const
returns the coverage (in percent) of the protein hit based upon matched peptides
std::map< Size, MzTabDouble > best_search_engine_score
Definition: MzTab.h:588
@ Full
with N-terminus and C-terminus
Definition: Residue.h:152
MzTabString sequence
Definition: MzTab.h:609
MzTabString description
Definition: MzTab.h:582
void getPrimaryMSRunPath(StringList &toFill) const
returns the MS run path (stored in ColumnHeaders)
std::set< FeatureHandle, FeatureHandle::IndexLess > HandleSetType
Type definitions.
Definition: ConsensusFeature.h:75
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:454
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:699
MzTabModificationList modifications
Definition: MzTab.h:618
std::vector< MzTabOptionalColumnEntry > opt_
Definition: MzTab.h:655
IntensityType getIntensity() const
Definition: Peak2D.h:166
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
database which holds all residue modifications from UniMod
Definition: ModificationsDB.h:72
String toString() const
returns the peptide as string with modifications embedded in brackets
@ MZIDENTML
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:77
MzTabString database
Definition: MzTab.h:639
String toUnmodifiedString() const
returns the peptide as string without any modifications
void set(const std::vector< MzTabDouble > &entries)
std::map< Size, MzTabDouble > peptide_abundance_study_variable
Definition: MzTab.h:626
MzTabString end
Definition: MzTab.h:654
@ PROTEIN_C_TERM
Definition: ResidueModification.h:100
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
MzTabString database_version
Definition: MzTab.h:586
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
returns a const reference to the PeptideIdentification vector
bool store_peptide_uri_
Definition: MzTabFile.h:91
void setMetaData(const MzTabMetaData &md)
std::map< Size, MzTabDouble > peptide_abundance_stdev_study_variable
Definition: MzTab.h:627
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
MzTabModificationList modifications
Definition: MzTab.h:644
void setSpecRef(String spec_ref)
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
MzTabString post
Definition: MzTab.h:652
void fromCellString(const String &s) override
MzTabBoolean unique
Definition: MzTab.h:611
bool store_psm_uri_
Definition: MzTabFile.h:92
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
void assignRanks()
Sorts the hits by score and assigns ranks according to the scores.
std::vector< MzTabOptionalColumnEntry > opt_
Definition: MzTab.h:603
const String & getAccession() const
returns the accession of the protein
static const char N_TERMINAL_AA
Definition: PeptideEvidence.h:60
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
void set(const std::vector< MzTabString > &entries)
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:458
void set(const std::vector< MzTabModification > &entries)
void setAccession(const String &accession)
void setMSFile(Size index)
static const int UNKNOWN_POSITION
Definition: PeptideEvidence.h:53
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:112
@ FEATUREXML
OpenMS feature file (.featureXML)
Definition: FileTypes.h:65
std::vector< MzTabPeptideSectionRow > MzTabPeptideSectionRows
Definition: MzTab.h:689
char getOrigin() const
Returns the origin (i.e. modified amino acid)
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:450
@ C_TERM
Definition: ResidueModification.h:98
Stream class for writing to comma/tab/...-separated values files.
Definition: SVOutStream.h:54
MzTabDouble protein_coverage
Definition: MzTab.h:598
const std::vector< ProteinIdentification > & getProteinIdentifications() const
non-mutable access to the protein identifications
String db
The used database.
Definition: ProteinIdentification.h:106
Facilitates file handling by file type recognition.
Definition: FileHandler.h:62
static const char UNKNOWN_AA
Definition: PeptideEvidence.h:57
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
@ PROTEIN_N_TERM
Definition: ResidueModification.h:101
Type
Actual file types enum.
Definition: FileTypes.h:58
const ChargeType & getCharge() const
Non-mutable access to charge state.
MzTabDouble mass_to_charge
Definition: MzTab.h:622
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:69
MzTabString pre
Definition: MzTab.h:651
float getScore() const
returns the score of the protein hit
Size size() const
returns the number of residues
void store(const String &filename, const MzTab &mz_tab) const
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
String db_version
The database version.
Definition: ProteinIdentification.h:107
bool store_peptide_reliability_
Definition: MzTabFile.h:87
MzTabString accession
Definition: MzTab.h:637
void setProteinSectionRows(const MzTabProteinSectionRows &psd)
bool isModified() const
returns true if any of the residues or termini are modified
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
MzTabInteger charge
Definition: MzTab.h:646
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
String & toUpper()
Converts the string to uppercase.
std::map< Size, std::map< Size, MzTabDouble > > search_engine_score_ms_run
Definition: MzTab.h:616
A container for consensus elements.
Definition: ConsensusMap.h:75
const std::vector< PeptideIdentification > & getUnassignedPeptideIdentifications() const
non-mutable access to the unassigned peptide identifications
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
void setPSMSectionRows(const MzTabPSMSectionRows &psd)
std::vector< String > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition: ProteinIdentification.h:77
MzTabStringList ambiguity_members
Definition: MzTab.h:594
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:111
std::map< Size, MzTabDouble > best_search_engine_score
Definition: MzTab.h:615
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
void setProteinIdentifications(const std::vector< ProteinIdentification > &protein_identifications)
sets the protein identifications
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
std::map< Size, MzTabDouble > peptide_abundance_std_error_study_variable
Definition: MzTab.h:628
bool hasCTerminalModification() const
predicate which is true if the peptide is C-term modified
const ColumnHeaders & getColumnHeaders() const
Non-mutable access to the file descriptions.
const std::vector< PeptideEvidence > & getPeptideEvidences() const
returns information on peptides (potentially) identified by this PSM
MzTabString accession
Definition: MzTab.h:610
MzTabBoolean unique
Definition: MzTab.h:638
A container for features.
Definition: FeatureMap.h:93
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
ConvexHull2D & getConvexHull() const
Returns the overall convex hull of the feature (calculated from the convex hulls of the mass traces)
An LC-MS feature.
Definition: Feature.h:70
void setPositionsAndParameters(const std::vector< std::pair< Size, MzTabParameter > > &ppp)
double getScore() const
returns the PSM score
MzTabDouble calc_mass_to_charge
Definition: MzTab.h:648
String getDescription() const
returns the description of the protein
std::map< Size, MzTabDouble > search_engine_score
Definition: MzTab.h:642
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:68
MzTabInteger PSM_ID
Definition: MzTab.h:636
MzTabParameterList search_engine
Definition: MzTab.h:641
static const char C_TERMINAL_AA
Definition: PeptideEvidence.h:61
String toString() const
Conversion to String.
DBoundingBox< 2 > getBoundingBox() const
returns the bounding box of the feature hull points
bool store_protein_goterms_
Definition: MzTabFile.h:94
MzTabString start
Definition: MzTab.h:653
CoordinateType maxX() const
Accessor for min_ coordinate maximum.
Definition: DIntervalBase.h:252
bool store_smallmolecule_reliability_
Definition: MzTabFile.h:89
MzTabString database
Definition: MzTab.h:585
MzTabInteger charge
Definition: MzTab.h:621
Representation of a peptide/protein sequence.
Definition: AASequence.h:107
const char * what() const noexcept override
Returns the error message of the exception.
bool hasNTerminalModification() const
predicate which is true if the peptide is N-term modified
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:71
Search parameters of the DB search.
Definition: ProteinIdentification.h:103
MzTabString database_version
Definition: MzTab.h:640
const ResidueModification * getCTerminalModification() const
returns a pointer to the C-terminal modification, or zero if none is set
std::pair< String, MzTabString > MzTabOptionalColumnEntry
Definition: MzTab.h:575
TermSpecificity getTermSpecificity() const
returns terminal specificity
std::map< std::pair< String, String >, std::vector< PeptideHit > > MapAccPepType
Definition: MzTabFile.h:66
const ResidueModification * getNTerminalModification() const
returns a pointer to the N-terminal modification, or zero if none is set
MzTabSpectraRef spectra_ref
Definition: MzTab.h:650
bool store_smallmolecule_uri_
Definition: MzTabFile.h:93
void setCVLabel(const String &CV_label)
const std::vector< ProteinIdentification > & getProteinIdentifications() const
non-mutable access to the protein identifications
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:61
MzTabDouble exp_mass_to_charge
Definition: MzTab.h:647
Used to load and store idXML files.
Definition: IdXMLFile.h:63
void setPeptideSectionRows(const MzTabPeptideSectionRows &psd)
bool store_protein_reliability_
Definition: MzTabFile.h:86
std::vector< MzTabSmallMoleculeSectionRow > MzTabSmallMoleculeSectionRows
Definition: MzTab.h:691
Representation of a peptide hit.
Definition: PeptideHit.h:54