 |
OpenMS
2.4.0
|
Go to the documentation of this file.
99 void setup(
PredictorMap& predictors,
const std::map<Size, Int>& labels);
110 void predict(std::vector<Prediction>& predictions,
111 std::vector<Size> indexes = std::vector<Size>())
const;
121 void getFeatureWeights(std::map<String, double>& feature_weights)
const;
124 void writeXvalResults(
const String& path)
const;
131 std::vector<std::vector<struct svm_node> >
nodes_;
134 struct svm_problem data_;
137 struct svm_parameter svm_params_;
158 void scaleData_(PredictorMap& predictors)
const;
161 void convertData_(
const PredictorMap& predictors);
164 std::pair<double, double> chooseBestParameters_()
const;
167 void optimizeParameters_();
std::vector< std::vector< double > > SVMPerformance
Classification performance for different param. combinations (C/gamma):
Definition: SimpleSVM.h:128
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:118
void setProgress(SignedSize value) const
Sets the current progress.
Base class for TOPP applications.
Definition: TOPPBase.h:150
Normalizes the peak intensities spectrum-wise.
Definition: Normalizer.h:57
@ PEPTIDE_IDS_EMPTY
Definition: PeptideIndexing.h:133
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
SVMPerformance performance_
Cross-validation results.
Definition: SimpleSVM.h:152
std::map< Int, double > probabilities
Predicted probabilities for different classes.
Definition: SimpleSVM.h:80
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
Definition: DigestionEnzymeDB.h:70
PeakMassType
Peak mass type.
Definition: ProteinIdentification.h:93
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
This class serves for reading in and writing FASTA files.
Definition: FASTAFile.h:64
WindowMower augments the highest peaks in a sliding or jumping window.
Definition: WindowMower.h:54
Simple interface to support vector machines for classification (via LIBSVM).
Definition: SimpleSVM.h:65
#define NUMBER_OF_THREADS
Definition: SimpleSearchEngine.cpp:74
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
UInt missed_cleavages
The number of allowed missed cleavages.
Definition: ProteinIdentification.h:113
File adapter for MzML files.
Definition: MzMLFile.h:55
A more convenient string class.
Definition: String.h:57
Iterator begin()
Definition: MSExperiment.h:157
static ModificationsDB * getInstance(OpenMS::String unimod_file="CHEMISTRY/unimod.xml", OpenMS::String psimod_file="CHEMISTRY/PSI-MOD.obo", OpenMS::String xlmod_file="CHEMISTRY/XLMOD.obo")
Returns a pointer to the modifications DB (singleton)
Definition: ModificationsDB.h:77
void filterPeakSpectrum(PeakSpectrum &spectrum)
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
Definition: SimpleSearchEngine.cpp:81
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
Definition: DigestionEnzymeDB.h:122
void addMSLevel(int level)
adds a desired MS level for peaks to load
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
double getMonoWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
SignedSize peptide_mod_index
Definition: SimpleSearchEngine.cpp:88
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:454
@ MONOISOTOPIC
Definition: ProteinIdentification.h:95
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
ExitCodes run(std::vector< FASTAFile::FASTAEntry > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
forward for old interface and pyOpenMS; use run<T>() for more control
Definition: PeptideIndexing.h:147
virtual void getSpectrum(PeakSpectrum &spec, const AASequence &peptide, Int min_charge, Int max_charge) const
returns a spectrum with the ion types, that are set in the tool parameters
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:58
Int label
Predicted class label.
Definition: SimpleSVM.h:77
const double PROTON_MASS_U
void endProgress() const
Ends the progress display.
@ UNEXPECTED_RESULT
Definition: PeptideIndexing.h:135
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
Size size() const
Definition: MSExperiment.h:127
NLargest removes all but the n largest peaks.
Definition: NLargest.h:54
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
double score
Definition: SimpleSearchEngine.cpp:89
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:115
static DateTime now()
Returns the current date and time.
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:109
StringView sequence
Definition: SimpleSearchEngine.cpp:87
int Int
Signed integer type.
Definition: Types.h:102
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:99
void assignRanks()
Sorts the hits by score and assigns ranks according to the scores.
std::vector< double > log2_gamma_
Definition: SimpleSVM.h:149
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Size n_parts_
Number of partitions for cross-validation.
Definition: SimpleSVM.h:146
void registerOptionsAndFlags_() override
Sets the valid command line options (with argument) and flags (without argument).
Definition: SimpleSearchEngine.cpp:107
void filterPeakMap(PeakMap &exp) const
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:112
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:110
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
std::vector< std::vector< struct svm_node > > nodes_
Values of predictors (LIBSVM format)
Definition: SimpleSVM.h:131
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
static void applyVariableModifications(const std::vector< ResidueModification >::const_iterator &var_mods_begin, const std::vector< ResidueModification >::const_iterator &var_mods_end, const AASequence &peptide, Size max_variable_mods_per_peptide, std::vector< AASequence > &all_modified_peptides, bool keep_original=true)
String db
The used database.
Definition: ProteinIdentification.h:106
StringView provides a non-owning view on an existing string.
Definition: String.h:480
void setHigherScoreBetter(bool value)
sets the peptide score orientation
void setParameters(const Param ¶m)
Sets the parameters.
vector< ResidueModification > getModifications_(StringList modNames)
Definition: SimpleSearchEngine.cpp:169
static void applyFixedModifications(const std::vector< ResidueModification >::const_iterator &fixed_mods_begin, const std::vector< ResidueModification >::const_iterator &fixed_mods_end, AASequence &peptide)
void setCharge(Int charge)
sets the charge of the peptide
static String getVersion()
Return the version number of OpenMS.
const Param & getParameters() const
Non-mutable access to the parameters.
Slimmer structure as storing all scored candidates in PeptideHit objects takes too much space.
Definition: SimpleSearchEngine.cpp:85
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:114
static void deisotopeAndSingleCharge(MSSpectrum &spectra, double fragment_tolerance, bool fragment_unit_ppm, int min_charge=1, int max_charge=3, bool keep_only_deisotoped=false, unsigned int min_isopeaks=3, unsigned int max_isopeaks=10, bool make_single_charged=true, bool annotate_charge=false)
ExitCodes
Exit codes.
Definition: PeptideIndexing.h:129
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
void sortByPosition()
Lexicographically sorts the peaks by their position.
void setSequence(const AASequence &sequence)
sets the peptide sequence
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:111
static bool hasBetterScore(const AnnotatedHit &a, const AnnotatedHit &b)
Definition: SimpleSearchEngine.cpp:92
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:116
void store(String filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Generates theoretical spectra with various options.
Definition: TheoreticalSpectrumGenerator.h:63
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Size digestUnmodified(const StringView &sequence, std::vector< StringView > &output, Size min_length=1, Size max_length=0) const
Performs the enzymatic digestion of an unmodified sequence.
static void printNull_(const char *)
Dummy function to suppress LIBSVM output.
Definition: SimpleSVM.h:155
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
void filterPeakMap(PeakMap &exp)
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
Iterator end()
Definition: MSExperiment.h:167
void postProcessHits_(const PeakMap &exp, vector< vector< AnnotatedHit > > &annotated_hits, vector< ProteinIdentification > &protein_ids, vector< PeptideIdentification > &peptide_ids, Size top_hits, const vector< ResidueModification > &fixed_modifications, const vector< ResidueModification > &variable_modifications, Size max_variable_mods_per_peptide)
Definition: SimpleSearchEngine.cpp:231
@ DATABASE_EMPTY
Definition: PeptideIndexing.h:132
Refreshes the protein references for all peptide hits in a vector of PeptideIdentifications and adds ...
Definition: PeptideIndexing.h:123
struct svm_model * model_
Pointer to SVM model (LIBSVM format)
Definition: SimpleSVM.h:140
Options for loading files containing peak data.
Definition: PeakFileOptions.h:47
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
ThresholdMower removes all peaks below a threshold.
Definition: ThresholdMower.h:51
ExitCodes main_(int, const char **) override
The actual "main" method. main_() is invoked by main().
Definition: SimpleSearchEngine.cpp:325
ExitCodes
Exit codes.
Definition: TOPPBase.h:155
std::vector< PeptideHit::PeakAnnotation > fragment_annotations
Definition: SimpleSearchEngine.cpp:90
void getAllSearchModifications(std::vector< String > &modifications) const
Collects all modifications that can be used for identification searches.
void clearMSLevels()
clears the MS levels
SimpleSearchEngine()
Definition: SimpleSearchEngine.cpp:99
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:117
Management and storage of parameters / INI files.
Definition: Param.h:74
std::map< String, std::vector< double > > PredictorMap
Mapping from predictor name to vector of predictor values.
Definition: SimpleSVM.h:71
SVM prediction result.
Definition: SimpleSVM.h:74
Representation of a peptide/protein sequence.
Definition: AASequence.h:107
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:113
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
std::vector< String > predictor_names_
Names of predictors in the model (excluding uninformative ones)
Definition: SimpleSVM.h:143
const double C13C12_MASSDIFF_U
@ EXECUTION_OK
Definition: PeptideIndexing.h:131
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
Search parameters of the DB search.
Definition: ProteinIdentification.h:103
void setMZ(double mz)
sets the MZ of the MS2 spectrum
void preprocessSpectra_(PeakMap &exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm)
Definition: SimpleSearchEngine.cpp:183
ExitCodes main(int argc, const char **argv)
Main routine of all TOPP applications.
void filterPeakSpectrum(PeakSpectrum &spectrum)
int main(int argc, const char **argv)
Definition: SimpleSearchEngine.cpp:655
static double compute(double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, const PeakSpectrum &exp_spectrum, const PeakSpectrum &theo_spectrum)
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
void setScoreType(const String &type)
sets the peptide score type
Used to load and store idXML files.
Definition: IdXMLFile.h:63
void setScore(double score)
sets the PSM score
Representation of a peptide hit.
Definition: PeptideHit.h:54