vg
tools for working with variation graphs
|
#include <multipath_mapper.hpp>
Public Types | |
using | memcluster_t = pair< vector< pair< const MaximalExactMatch *, pos_t > >, double > |
We often pass around clusters of MEMs and their graph positions, paired with a multiplicity. More... | |
using | clustergraph_t = tuple< bdsg::HashGraph *, memcluster_t, size_t > |
using | match_fanouts_t = unordered_map< const MaximalExactMatch *, deque< pair< string::const_iterator, char > >> |
Public Member Functions | |
MultipathMapper (PathPositionHandleGraph *graph, gcsa::GCSA *gcsa_index, gcsa::LCPArray *lcp_array, haplo::ScoreProvider *haplo_score_provider=nullptr, SnarlManager *snarl_manager=nullptr, MinimumDistanceIndex *distance_index=nullptr) | |
~MultipathMapper () | |
void | multipath_map (const Alignment &alignment, vector< multipath_alignment_t > &multipath_alns_out) |
Map read in alignment to graph and make multipath alignments. More... | |
void | multipath_map_paired (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer) |
void | reduce_to_single_path (const multipath_alignment_t &multipath_aln, vector< Alignment > &alns_out, size_t max_number) const |
void | set_automatic_min_clustering_length (double random_mem_probability=0.5) |
void | calibrate_mismapping_detection (size_t num_simulations, const vector< size_t > &simulated_read_lengths) |
void | init_band_padding_memo () |
Should be called once after construction, or any time the band padding multiplier is changed. More... | |
void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Set all the aligner scoring parameters and create the stored aligner instances. More... | |
void | set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
void | set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
![]() | |
BaseMapper (PathPositionHandleGraph *xidex, gcsa::GCSA *g, gcsa::LCPArray *a, haplo::ScoreProvider *haplo_score_provider=nullptr) | |
BaseMapper (void) | |
int | random_match_length (double chance_random) |
void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, double haplotype_consistency_exponent=1) |
Override alignment score setting to support haplotype consistency exponent. More... | |
void | set_alignment_scores (istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, double haplotype_consistency_exponent=1) |
Same, but loading a 4x4 substitution score matrix from a stream. More... | |
void | set_cache_size (int new_cache_size) |
vector< MaximalExactMatch > | find_mems_deep (string::const_iterator seq_begin, string::const_iterator seq_end, double &lcp_avg, double &fraction_filtered, int max_mem_length=0, int min_mem_length=1, int reseed_length=0, bool use_lcp_reseed_heuristic=false, bool use_diff_based_fast_reseed=false, bool include_parent_in_sub_mem_count=false, bool record_max_lcp=false, int reseed_below_count=0) |
vector< MaximalExactMatch > | find_mems_simple (string::const_iterator seq_begin, string::const_iterator seq_end, int max_mem_length=0, int min_mem_length=1, int reseed_length=0) |
vector< MaximalExactMatch > | find_stripped_matches (string::const_iterator seq_begin, string::const_iterator seq_end, size_t strip_length, size_t max_match_length, size_t target_count) |
vector< MaximalExactMatch > | find_fanout_mems (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator qual_begin, int max_fans_out, char max_fanout_base_quality, vector< deque< pair< string::const_iterator, char >>> *mem_fanout_breaks=nullptr) |
vector< pos_t > | walk_fanout_path (string::const_iterator begin, string::const_iterator end, const deque< pair< string::const_iterator, char >> &fanout_breaks, gcsa::node_type pos) |
void | rescue_high_count_order_length_mems (vector< MaximalExactMatch > &mems, size_t max_rescue_hit_count) |
void | precollapse_order_length_runs (string::const_iterator seq_begin, vector< MaximalExactMatch > &mems) |
void | prefilter_redundant_sub_mems (vector< MaximalExactMatch > &mems, vector< pair< int, vector< size_t >>> &sub_mem_containment_graph) |
void | find_sub_mems (const vector< MaximalExactMatch > &mems, int parent_layer_begin, int parent_layer_end, int mem_idx, string::const_iterator next_mem_end, int min_mem_length, vector< pair< MaximalExactMatch, vector< size_t >>> &sub_mems_out) |
void | find_sub_mems_fast (const vector< MaximalExactMatch > &mems, int parent_layer_begin, int parent_layer_end, int mem_idx, string::const_iterator leftmost_guaranteed_disjoint_bound, string::const_iterator leftmost_seeding_bound, int min_sub_mem_length, vector< pair< MaximalExactMatch, vector< size_t >>> &sub_mems_out) |
set< pos_t > | sequence_positions (const string &seq) |
size_t | get_adaptive_min_reseed_length (size_t parent_mem_length) |
void | apply_haplotype_consistency_scores (const vector< Alignment * > &alns) |
![]() | |
void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Set all the aligner scoring parameters and create the stored aligner instances. More... | |
void | set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
void | set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
![]() | |
void | set_fragment_length_distr_params (size_t maximum_sample_size=1000, size_t reestimation_frequency=1000, double robust_estimation_fraction=0.95) |
bool | has_fixed_fragment_length_distr () |
Returns true if fragment length distribution has been fixed. More... | |
void | force_fragment_length_distr (double mean, double stddev) |
Protected Member Functions | |
void | multipath_map_internal (const Alignment &alignment, MappingQualityMethod mapq_method, vector< multipath_alignment_t > &multipath_alns_out) |
void | attempt_unpaired_multipath_map_of_pair (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer) |
bool | attempt_rescue (const multipath_alignment_t &multipath_aln, const Alignment &other_aln, bool rescue_forward, multipath_alignment_t &rescue_multipath_aln) |
void | extract_rescue_graph (const multipath_alignment_t &multipath_aln, const Alignment &other_aln, bool rescue_forward, MutableHandleGraph *rescue_graph) const |
Use the algorithm implied by the mapper settings to extract a subgraph to perform a rescue alignment against. More... | |
void | align_to_cluster_graphs (const Alignment &alignment, MappingQualityMethod mapq_method, vector< clustergraph_t > &cluster_graphs, vector< multipath_alignment_t > &multipath_alns_out, vector< double > &multiplicities_out, size_t num_mapping_attempts, const match_fanouts_t *fanouts=nullptr, vector< size_t > *cluster_idxs=nullptr) |
void | align_to_cluster_graph_pairs (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, vector< pair< size_t, size_t >> &duplicate_pairs_out, const match_fanouts_t *fanouts1, const match_fanouts_t *fanouts2) |
bool | align_to_cluster_graphs_with_rescue (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< MaximalExactMatch > &mems1, vector< MaximalExactMatch > &mems2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &pair_distances_out, vector< double > &pair_multiplicities_out, const match_fanouts_t *fanouts1, const match_fanouts_t *fanouts2) |
void | attempt_rescue_for_secondaries (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< size_t, size_t >> &duplicate_pairs, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, const match_fanouts_t *fanouts1, const match_fanouts_t *fanouts2) |
void | merge_rescued_mappings (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, vector< pair< multipath_alignment_t, multipath_alignment_t >> &rescued_multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &rescued_cluster_pairs, vector< double > &rescued_multiplicities) const |
Merge the rescued mappings into the output vector and deduplicate pairs. More... | |
vector< memcluster_t > | get_clusters (const Alignment &alignment, const vector< MaximalExactMatch > &mems, OrientedDistanceMeasurer *distance_measurer=nullptr, const match_fanouts_t *fanouts=nullptr) const |
vector< pair< pair< size_t, size_t >, int64_t > > | get_cluster_pairs (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, OrientedDistanceMeasurer *distance_measurer=nullptr) |
vector< clustergraph_t > | query_cluster_graphs (const Alignment &alignment, const vector< MaximalExactMatch > &mems, const vector< memcluster_t > &clusters) |
pair< bdsg::HashGraph *, bool > | extract_cluster_graph (const Alignment &alignment, const memcluster_t &mem_cluster) |
pair< bdsg::HashGraph *, bool > | extract_maximal_graph (const Alignment &alignment, const memcluster_t &mem_cluster) |
pair< bdsg::HashGraph *, bool > | extract_restrained_graph (const Alignment &alignment, const memcluster_t &mem_cluster) |
vector< pair< int64_t, int64_t > > | covered_intervals (const Alignment &alignment, const clustergraph_t &cluster) const |
Returns the union of the intervals on the read that a cluster cover in sorted order. More... | |
void | split_multicomponent_alignments (vector< multipath_alignment_t > &multipath_alns_out, vector< size_t > *cluster_idxs=nullptr, vector< double > *multiplicities=nullptr) const |
void | split_multicomponent_alignments (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &multiplicities) const |
void | agglomerate_alignments (vector< multipath_alignment_t > &multipath_alns_out, vector< double > *multiplicities=nullptr) const |
void | agglomerate_alignment_pairs (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &multiplicities) const |
void | agglomerate (size_t idx, multipath_alignment_t &agglomerating, const multipath_alignment_t &multipath_aln, vector< size_t > &agglomerated_group, unordered_set< pos_t > &agg_start_positions, unordered_set< pos_t > &agg_end_positions) const |
The internal agglomeration procedure. More... | |
void | find_spliced_alignments (const Alignment &alignment, vector< multipath_alignment_t > &multipath_alns_out, vector< double > &multiplicities, vector< size_t > &cluster_idxs, const vector< MaximalExactMatch > &mems, vector< clustergraph_t > &cluster_graphs, const match_fanouts_t *fanouts=nullptr) |
void | find_spliced_alignments (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, const vector< MaximalExactMatch > &mems1, const vector< MaximalExactMatch > &mems2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, const match_fanouts_t *fanouts=nullptr) |
void | identify_aligned_splice_candidates (const Alignment &alignment, bool search_left, const pair< int64_t, int64_t > &primary_interval, const vector< multipath_alignment_t > &multipath_alns, const vector< size_t > &cluster_idxs, unordered_set< size_t > &clusters_used_out, vector< size_t > &mp_aln_candidates_out) const |
void | identify_aligned_splice_candidates (const Alignment &alignment, bool read_1, bool search_left, const pair< int64_t, int64_t > &primary_interval, const vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, const vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, unordered_set< size_t > &clusters_used_out, vector< size_t > &mp_aln_candidates_out) const |
void | identify_unaligned_splice_candidates (const Alignment &alignment, bool search_left, const pair< int64_t, int64_t > &primary_interval, const vector< MaximalExactMatch > &mems, const vector< clustergraph_t > &cluster_graphs, const unordered_set< size_t > &clusters_already_used, vector< size_t > &cluster_candidates_out, vector< pair< const MaximalExactMatch *, pos_t >> &hit_candidates_out) const |
void | align_to_splice_candidates (const Alignment &alignment, vector< clustergraph_t > &cluster_graphs, const vector< size_t > &cluster_candidates, const vector< pair< const MaximalExactMatch *, pos_t >> &hit_candidates, const pair< int64_t, int64_t > &primary_interval, bool searching_left, vector< multipath_alignment_t > &candidates_out, vector< double > &multiplicities_out, const match_fanouts_t *mem_fanouts=nullptr) const |
bool | test_splice_candidates (const Alignment &alignment, bool searching_left, multipath_alignment_t &anchor_mp_aln, double &anchor_multiplicity, int64_t num_candidates, const function< const multipath_alignment_t &(int64_t)> &get_candidate, const function< multipath_alignment_t &&(int64_t)> &consume_candidate) |
void | multipath_align (const Alignment &alignment, const bdsg::HashGraph *graph, memcluster_t &graph_mems, multipath_alignment_t &multipath_aln_out, const match_fanouts_t *fanouts) const |
void | make_nontrivial_multipath_alignment (const Alignment &alignment, const HandleGraph &subgraph, const function< pair< id_t, bool >(id_t)> &translator, multipath_alignment_t &multipath_aln_out) const |
void | strip_full_length_bonuses (multipath_alignment_t &multipath_aln) const |
Remove the full length bonus from all source or sink subpaths that received it. More... | |
vector< double > | mapping_likelihoods (vector< multipath_alignment_t > &multipath_alns) const |
Returns a vector of log-likelihoods for each mapping. More... | |
vector< double > | pair_mapping_likelihoods (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, const vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs) const |
Returns a vector of log-likelihoods for each pair mapping. More... | |
int32_t | compute_raw_mapping_quality_from_scores (const vector< double > &scores, MappingQualityMethod mapq_method, bool have_qualities, const vector< double > *multiplicities=nullptr) const |
void | sort_and_compute_mapping_quality (vector< multipath_alignment_t > &multipath_alns, MappingQualityMethod mapq_method, vector< size_t > *cluster_idxs=nullptr, vector< double > *multiplicities=nullptr) const |
void | sort_and_compute_mapping_quality (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< pair< size_t, size_t >> *duplicate_pairs_out=nullptr, vector< double > *pair_multiplicities=nullptr) const |
double | estimate_missed_rescue_multiplicity (size_t which_pair, const vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, const vector< clustergraph_t > &cluster_graphs1, const vector< clustergraph_t > &cluster_graphs2, bool from_secondary_rescue) const |
double | cluster_multiplicity (const memcluster_t &cluster) const |
double | pair_cluster_multiplicity (const memcluster_t &cluster_1, const memcluster_t &cluster_2) const |
double | fragment_length_log_likelihood (int64_t length) const |
Computes the log-likelihood of a given fragment length in the trained distribution. More... | |
bool | likely_mismapping (const multipath_alignment_t &multipath_aln) |
Would an alignment this good be expected against a graph this big by chance alone. More... | |
bool | likely_misrescue (const multipath_alignment_t &multipath_aln) |
Would an alignment this good be expected against a graph this big by chance alone. More... | |
size_t | pseudo_length (const multipath_alignment_t &multipath_aln) const |
A scaling of a score so that it approximately follows the distribution of the longest match in p-value test. More... | |
double | random_match_p_value (size_t match_length, size_t read_length) |
The approximate p-value for a match length of the given size against the current graph. More... | |
match_fanouts_t | record_fanouts (const vector< MaximalExactMatch > &mems, vector< deque< pair< string::const_iterator, char >>> &fanouts) const |
Reorganizes the fan-out breaks into the format that MultipathAlignmentGraph wants it in. More... | |
int64_t | distance_between (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2, bool full_fragment=false, bool forward_strand=false) const |
int64_t | distance (const pos_t &pos_1, const pos_t &pos_2) const |
bool | are_consistent (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2) const |
Are two multipath alignments consistently placed based on the learned fragment length distribution? More... | |
bool | is_consistent (int64_t distance) const |
Is this a consistent inter-pair distance based on the learned fragment length distribution? More... | |
double | read_coverage_z_score (int64_t coverage, const Alignment &alignment) const |
Computes the Z-score of the number of matches against an equal length random DNA string. More... | |
bool | share_terminal_positions (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2) const |
haploMath::RRMemo & | get_rr_memo (double recombination_penalty, size_t population_size) const |
Get a thread_local RRMemo with these parameters. More... | |
void | establish_strand_consistency (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs) |
int64_t | pessimistic_gap (int64_t length, double multiplier) const |
A restrained estimate of the amount of gap we would like to align for a read tail. More... | |
vector< MaximalExactMatch > | find_mems (const Alignment &alignment, vector< deque< pair< string::const_iterator, char >>> *mem_fanout_breaks=nullptr) |
![]() | |
AlignerClient (double gc_content_estimate=vg::default_gc_content) | |
const GSSWAligner * | get_aligner (bool have_qualities=true) const |
const QualAdjAligner * | get_qual_adj_aligner () const |
const Aligner * | get_regular_aligner () const |
Static Protected Member Functions | |
static int64_t | read_coverage (const memcluster_t &mem_hits) |
Computes the number of read bases a cluster of MEM hits covers. More... | |
Protected Attributes | |
DinucleotideMachine | dinuc_machine |
SpliceMotifs | splice_motifs |
SnarlManager * | snarl_manager |
MinimumDistanceIndex * | distance_index |
PathComponentIndex * | path_component_index = nullptr |
vector< size_t > | band_padding_memo |
![]() | |
FragmentLengthDistribution | fragment_length_distr |
Holds the actual fragment length distribution and estimation information. More... | |
Static Protected Attributes | |
static thread_local unordered_map< pair< double, size_t >, haploMath::RRMemo > | rr_memos |
Memos used by population model. More... | |
static thread_local unordered_map< pair< size_t, size_t >, double > | p_value_memo |
static thread_local unordered_map< double, vector< int64_t > > | pessimistic_gap_memo |
static const size_t | gap_memo_max_size = 1000 |
Additional Inherited Members | |
![]() | |
static double | estimate_gc_content (const gcsa::GCSA *gcsa) |
![]() | |
static int8_t * | parse_matrix (std::istream &matrix_stream) |
Allocates an array to hold a 4x4 substitution matrix and returns it. More... | |
![]() | |
static thread_local vector< size_t > | adaptive_reseed_length_memo |
using vg::MultipathMapper::clustergraph_t = tuple<bdsg::HashGraph*, memcluster_t, size_t> |
This represents a graph for a cluster, and holds a pointer to the actual extracted graph, a list of assigned MEMs, and the number of bases of read coverage that that MEM cluster provides (which serves as a priority).
using vg::MultipathMapper::match_fanouts_t = unordered_map<const MaximalExactMatch*, deque<pair<string::const_iterator, char> >> |
Represents the mismatches that were allowed in "MEMs" from the fanout match algorithm
using vg::MultipathMapper::memcluster_t = pair<vector<pair<const MaximalExactMatch*, pos_t> >, double> |
We often pass around clusters of MEMs and their graph positions, paired with a multiplicity.
vg::MultipathMapper::MultipathMapper | ( | PathPositionHandleGraph * | graph, |
gcsa::GCSA * | gcsa_index, | ||
gcsa::LCPArray * | lcp_array, | ||
haplo::ScoreProvider * | haplo_score_provider = nullptr , |
||
SnarlManager * | snarl_manager = nullptr , |
||
MinimumDistanceIndex * | distance_index = nullptr |
||
) |
vg::MultipathMapper::~MultipathMapper | ( | ) |
|
protected |
The internal agglomeration procedure.
|
protected |
Combine all of the significant alignments into one pair. Requires alignments to be sorted by significance already
|
protected |
Combine all of the significant alignments into one. Requires alignments to be sorted by significance already
|
protected |
After clustering MEMs, extracting graphs, assigning hits to cluster graphs, and determining which cluster graph pairs meet the fragment length distance constraints, perform multipath alignment Produces topologically sorted multipath_alignment_ts.
|
protected |
After clustering MEMs, extracting graphs, and assigning hits to cluster graphs, perform multipath alignment. Produces topologically sorted multipath_alignment_ts.
|
protected |
Align the read ends independently, but also try to form rescue alignments for each from the other. Return true if output obeys pair consistency and false otherwise. Produces topologically sorted multipath_alignment_ts.
|
protected |
|
protected |
Are two multipath alignments consistently placed based on the learned fragment length distribution?
|
protected |
Extracts a section of graph at a distance from the multipath_alignment_t based on the fragment length distribution and attempts to align the other paired read to it. If rescuing forward, assumes the provided multipath_alignment_t is the first read and vice versa if rescuing backward. Rescue constructs a conventional local alignment with gssw and converts the Alignment to a multipath_alignment_t. The multipath_alignment_t will be stored in the object passed by reference as an argument.
|
protected |
Use the rescue routine on strong suboptimal clusters to see if we can find a good secondary. Produces topologically sorted multipath_alignment_ts.
|
protected |
Before the fragment length distribution has been estimated, look for an unambiguous mapping of the reads using the single ended routine. If we find one record the fragment length and report the pair, if we don't find one, add the read pair to a buffer instead of the output vector.
void vg::MultipathMapper::calibrate_mismapping_detection | ( | size_t | num_simulations, |
const vector< size_t > & | simulated_read_lengths | ||
) |
Map random sequences against the graph to calibrate a parameterized distribution that detects when mappings are likely to have occurred by chance
|
protected |
Estimates the number of equivalent mappings (including this one), which we may not have seen due to limits on the numbers of hits returns for a MEM
|
protected |
Compute a mapping quality from a list of scores, using the selected method. Optionally considers non-present duplicates of the scores encoded as multiplicities
|
protected |
Returns the union of the intervals on the read that a cluster cover in sorted order.
|
protected |
Compute the approximate distance between two multipath alignments If either is unmapped, or the distance cannot be obtained, returns numeric_limits<int64_t>::max()
|
protected |
Detects if each pair can be assigned to a consistent strand of a path, and if not removes them. Also inverts the distances in the cluster pairs vector according to the strand
|
protected |
Estimates the number of equivalent mappings (including this one), which we may not have seen due to unexplored rescues.
|
protected |
Return a graph (on the heap) that contains a cluster. The paired bool indicates whether the graph is known to be connected (but it is possible for the graph to be connected and have it return false)
|
protected |
Extract a graph that is guaranteed to contain all local alignments that include the MEMs of the cluster. The paired bool indicates whether the graph is known to be connected (but it is possible for the graph to be connected and have it return false)
|
protected |
Use the algorithm implied by the mapper settings to extract a subgraph to perform a rescue alignment against.
|
protected |
Extract a graph with an algorithm that tries to extract not much more than what is required to contain the cluster in a single connected component (can be slower than the maximal algorithm for alignments that require large indels), The paired bool indicates whether the graph is known to be connected (but it is possible for the graph to be connected and have it return false)
|
protected |
Return exact matches according to the object's parameters If using the fan-out algorithm, we can optionally leave fan-out MEMs in tact and return a vector of their breaks.
|
protected |
|
protected |
|
protected |
Computes the log-likelihood of a given fragment length in the trained distribution.
|
protected |
Use the oriented distance clusterer or the TVS clusterer to cluster pairs of clusters. Assumes that the fragment length distribution has been estimated and fixed.
|
protected |
Use the oriented distance clusterer or the TVS clusterer to cluster MEMs depending on parameters. If using oriented distance cluster, must alo provide an oriented distance measurer.
|
protected |
Get a thread_local RRMemo with these parameters.
|
protected |
|
protected |
|
protected |
void vg::MultipathMapper::init_band_padding_memo | ( | ) |
Should be called once after construction, or any time the band padding multiplier is changed.
|
protected |
Is this a consistent inter-pair distance based on the learned fragment length distribution?
|
protected |
Would an alignment this good be expected against a graph this big by chance alone.
|
protected |
Would an alignment this good be expected against a graph this big by chance alone.
|
protected |
Removes the sections of an Alignment's path within snarls and re-aligns them with multiple traceback to create a multipath alignment with non-trivial topology. Guarantees that the resulting multipath_alignment_t is in topological order.
|
protected |
Returns a vector of log-likelihoods for each mapping.
Get all the linearizations we are going to work with, possibly with duplicates. The first alignment will be optimal.
|
protected |
Merge the rescued mappings into the output vector and deduplicate pairs.
|
protected |
Make a multipath alignment of the read against the indicated graph and add it to the list of multimappings. Does NOT necessarily produce a multipath_alignment_t in topological order.
void vg::MultipathMapper::multipath_map | ( | const Alignment & | alignment, |
vector< multipath_alignment_t > & | multipath_alns_out | ||
) |
Map read in alignment to graph and make multipath alignments.
|
protected |
Wrapped internal function that allows some code paths to circumvent the current mapping quality method option.
void vg::MultipathMapper::multipath_map_paired | ( | const Alignment & | alignment1, |
const Alignment & | alignment2, | ||
vector< pair< multipath_alignment_t, multipath_alignment_t >> & | multipath_aln_pairs_out, | ||
vector< pair< Alignment, Alignment >> & | ambiguous_pair_buffer | ||
) |
Map a paired read to the graph and make paired multipath alignments. Assumes reads are on the same strand of the DNA/RNA molecule. If the fragment length distribution is still being estimated and the pair cannot be mapped unambiguously, adds the reads to a buffer for ambiguous pairs and does not output any multipath alignments.
|
protected |
Estimates the number of equivalent pair mappings (including this one), which we may not have seen due to limits on the numbers of hits returns for a MEM
|
protected |
Returns a vector of log-likelihoods for each pair mapping.
|
protected |
A restrained estimate of the amount of gap we would like to align for a read tail.
|
protected |
A scaling of a score so that it approximately follows the distribution of the longest match in p-value test.
|
protected |
Extracts a subgraph around each cluster of MEMs that encompasses any graph position reachable (according to the Mapper's aligner) with local alignment anchored at the MEMs. If any subgraphs overlap, they are merged into one subgraph. Returns a vector of all the merged cluster subgraphs, their MEMs assigned from the mems vector according to the MEMs' hits, and their read coverages in bp. The caller must delete the VG objects produced!
|
protected |
The approximate p-value for a match length of the given size against the current graph.
|
staticprotected |
Computes the number of read bases a cluster of MEM hits covers.
|
protected |
Computes the Z-score of the number of matches against an equal length random DNA string.
|
protected |
Reorganizes the fan-out breaks into the format that MultipathAlignmentGraph wants it in.
void vg::MultipathMapper::reduce_to_single_path | ( | const multipath_alignment_t & | multipath_aln, |
vector< Alignment > & | alns_out, | ||
size_t | max_number | ||
) | const |
Given a mapped multipath_alignment_t, reduce it to up to max_number + 1 nonoverlapping single path alignments, with mapping qualities accounting for positional uncertainty between them. Even if the read is unmapped, there will always be at least one (possibly score 0) output alignment.
void vg::MultipathMapper::set_alignment_scores | ( | const int8_t * | score_matrix, |
int8_t | gap_open, | ||
int8_t | gap_extend, | ||
int8_t | full_length_bonus | ||
) |
Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT)
void vg::MultipathMapper::set_alignment_scores | ( | int8_t | match, |
int8_t | mismatch, | ||
int8_t | gap_open, | ||
int8_t | gap_extend, | ||
int8_t | full_length_bonus | ||
) |
Set all the aligner scoring parameters and create the stored aligner instances.
void vg::MultipathMapper::set_alignment_scores | ( | std::istream & | matrix_stream, |
int8_t | gap_open, | ||
int8_t | gap_extend, | ||
int8_t | full_length_bonus | ||
) |
Set the algner scoring parameters and create the stored aligner instances. The stream should contain a 4 x 4 whitespace-separated substitution matrix (in the order ACGT)
void vg::MultipathMapper::set_automatic_min_clustering_length | ( | double | random_mem_probability = 0.5 | ) |
Sets the minimum clustering MEM length to the approximate length that a MEM would have to be to have at most the given probability of occurring in random sequence of the same size as the graph
|
protected |
Return true if any of the initial positions of the source Subpaths are shared between the two multipath alignments
|
protected |
Sorts mappings by score and store mapping quality of the optimal alignment in the multipath_alignment_t object Optionally also sorts a vector of indexes to keep track of the cluster-of-origin Allows multipath alignments where the best single path alignment is leaving the read unmapped. multipath_alignment_ts MUST be topologically sorted.
|
protected |
Sorts mappings by score and store mapping quality of the optimal alignment in the multipath_alignment_t object If there are ties between scores, breaks them by the expected distance between pairs as computed by the OrientedDistanceClusterer::cluster_pairs function (modified cluster_pairs vector) Allows multipath alignments where the best single path alignment is leaving the read unmapped. multipath_alignment_ts MUST be topologically sorted. Optionally considers non-present duplicates of the scores encoded as multiplicities
|
protected |
If there are any multipath_alignment_ts with multiple connected components, split them up and add them to the return vector. Properly handles multipath_alignment_ts that are unmapped. Does not depend on or guarantee topological order in the multipath_alignment_ts.
|
protected |
If there are any multipath_alignment_ts with multiple connected components, split them up and add them to the return vector, also measure the distance between them and add a record to the cluster pairs vector. Properly handles multipath_alignment_ts that are unmapped. Does not depend on or guarantee topological order in the multipath_alignment_ts.
|
protected |
Remove the full length bonus from all source or sink subpaths that received it.
|
protected |
bool vg::MultipathMapper::agglomerate_multipath_alns = false |
size_t vg::MultipathMapper::alt_anchor_max_length_diff = 5 |
bool vg::MultipathMapper::always_check_population = false |
|
protected |
size_t vg::MultipathMapper::band_padding_memo_size = 2000 |
double vg::MultipathMapper::band_padding_multiplier = 1.0 |
bool vg::MultipathMapper::component_min_dist = false |
|
protected |
|
protected |
bool vg::MultipathMapper::do_spliced_alignment = false |
bool vg::MultipathMapper::dynamic_max_alt_alns = false |
size_t vg::MultipathMapper::force_haplotype_count = 0 |
size_t vg::MultipathMapper::fragment_length_warning_factor = 0 |
|
staticprotected |
bool vg::MultipathMapper::get_rescue_graph_from_paths = true |
bool vg::MultipathMapper::greedy_min_dist = false |
double vg::MultipathMapper::log_likelihood_approx_factor = 1.0 |
double vg::MultipathMapper::mapq_scaling_factor = 1.0 |
size_t vg::MultipathMapper::max_alignment_gap = 5000 |
size_t vg::MultipathMapper::max_alt_mappings = 1 |
size_t vg::MultipathMapper::max_branch_trim_length = 1 |
size_t vg::MultipathMapper::max_expected_dist_approx_error = 8 |
double vg::MultipathMapper::max_exponential_rate_intercept = 0.612045 |
double vg::MultipathMapper::max_exponential_rate_slope = 0.000555181 |
double vg::MultipathMapper::max_exponential_shape_intercept = 12.136 |
double vg::MultipathMapper::max_exponential_shape_slope = 0.0113637 |
int vg::MultipathMapper::max_fanout_base_quality = 20 |
int vg::MultipathMapper::max_fans_out = 5 |
int64_t vg::MultipathMapper::max_intron_length = 1 << 18 |
double vg::MultipathMapper::max_mapping_p_value = 0.0001 |
size_t vg::MultipathMapper::max_p_value_memo_size = 500 |
size_t vg::MultipathMapper::max_rescue_attempts = 32 |
double vg::MultipathMapper::max_rescue_p_value = 0.1 |
size_t vg::MultipathMapper::max_single_end_mappings_for_rescue = 64 |
int64_t vg::MultipathMapper::max_snarl_cut_size = 5 |
int64_t vg::MultipathMapper::max_softclip_overlap = 8 |
int64_t vg::MultipathMapper::max_splice_overhang = 3 |
double vg::MultipathMapper::max_splice_p_value = 0.001 |
double vg::MultipathMapper::max_suboptimal_path_score_ratio = 2.0 |
double vg::MultipathMapper::mem_coverage_min_ratio = 0.5 |
size_t vg::MultipathMapper::min_clustering_mem_length = 0 |
size_t vg::MultipathMapper::min_median_mem_coverage_for_split = 0 |
int64_t vg::MultipathMapper::min_softclip_length_for_splice = 16 |
size_t vg::MultipathMapper::min_tail_anchor_length = 3 |
bool vg::MultipathMapper::no_clustering = false |
int32_t vg::MultipathMapper::num_alt_alns = 4 |
size_t vg::MultipathMapper::num_mapping_attempts = 48 |
size_t vg::MultipathMapper::order_length_repeat_hit_max = 0 |
|
staticprotected |
|
protected |
|
staticprotected |
double vg::MultipathMapper::pessimistic_gap_multiplier = 0.0 |
size_t vg::MultipathMapper::plausible_rescue_cluster_coverage_diff = 5 |
size_t vg::MultipathMapper::population_max_paths = 10 |
size_t vg::MultipathMapper::population_paths_hard_cap = 1000 |
double vg::MultipathMapper::recombination_penalty = 20.7 |
bool vg::MultipathMapper::report_group_mapq = false |
double vg::MultipathMapper::rescue_graph_std_devs = 6.0 |
size_t vg::MultipathMapper::rescue_only_anchor_max = 16 |
size_t vg::MultipathMapper::rescue_only_min = 128 |
bool vg::MultipathMapper::restrained_graph_extraction = false |
size_t vg::MultipathMapper::reversing_walk_length = 0 |
|
staticprotected |
Memos used by population model.
size_t vg::MultipathMapper::secondary_rescue_attempts = 4 |
double vg::MultipathMapper::secondary_rescue_score_diff = 1.0 |
int32_t vg::MultipathMapper::secondary_rescue_subopt_diff = 10 |
bool vg::MultipathMapper::simplify_topologies = false |
|
protected |
|
protected |
size_t vg::MultipathMapper::stripped_match_alg_max_length = 0 |
size_t vg::MultipathMapper::stripped_match_alg_strip_length = 16 |
size_t vg::MultipathMapper::stripped_match_alg_target_count = 5 |
bool vg::MultipathMapper::suppress_cluster_merging = false |
bool vg::MultipathMapper::suppress_mismapping_detection = false |
bool vg::MultipathMapper::suppress_multicomponent_splitting = false |
bool vg::MultipathMapper::suppress_p_value_memoization = false |
bool vg::MultipathMapper::suppress_tail_anchors = false |
bool vg::MultipathMapper::top_tracebacks = false |
double vg::MultipathMapper::unused_cluster_multiplicity_mq_limit = 7.0 |
bool vg::MultipathMapper::use_fanout_match_alg = false |
bool vg::MultipathMapper::use_min_dist_clusterer = false |
bool vg::MultipathMapper::use_pessimistic_tail_alignment = false |
bool vg::MultipathMapper::use_population_mapqs = false |
bool vg::MultipathMapper::use_stripped_match_alg = false |
bool vg::MultipathMapper::use_tvs_clusterer = false |