vg
tools for working with variation graphs
Public Types | Public Member Functions | Public Attributes | Protected Member Functions | Static Protected Member Functions | Protected Attributes | Static Protected Attributes | List of all members
vg::MultipathMapper Class Reference

#include <multipath_mapper.hpp>

Inheritance diagram for vg::MultipathMapper:
vg::BaseMapper vg::AlignerClient vg::PairedEndMapper

Public Types

using memcluster_t = vector< pair< const MaximalExactMatch *, pos_t > >
 We often pass around clusters of MEMs and their graph positions. More...
 
using clustergraph_t = tuple< bdsg::HashGraph *, memcluster_t, size_t >
 

Public Member Functions

 MultipathMapper (PathPositionHandleGraph *graph, gcsa::GCSA *gcsa_index, gcsa::LCPArray *lcp_array, haplo::ScoreProvider *haplo_score_provider=nullptr, SnarlManager *snarl_manager=nullptr, MinimumDistanceIndex *distance_index=nullptr)
 
 ~MultipathMapper ()
 
void multipath_map (const Alignment &alignment, vector< multipath_alignment_t > &multipath_alns_out)
 Map read in alignment to graph and make multipath alignments. More...
 
void multipath_map_paired (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer)
 
void reduce_to_single_path (const multipath_alignment_t &multipath_aln, vector< Alignment > &alns_out, size_t max_number) const
 
void set_automatic_min_clustering_length (double random_mem_probability=0.5)
 
void calibrate_mismapping_detection (size_t num_simulations, const vector< size_t > &simulated_read_lengths)
 
void init_band_padding_memo ()
 Should be called once after construction, or any time the band padding multiplier is changed. More...
 
- Public Member Functions inherited from vg::BaseMapper
 BaseMapper (PathPositionHandleGraph *xidex, gcsa::GCSA *g, gcsa::LCPArray *a, haplo::ScoreProvider *haplo_score_provider=nullptr)
 
 BaseMapper (void)
 
int random_match_length (double chance_random)
 
void set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, double haplotype_consistency_exponent=1)
 Override alignment score setting to support haplotype consistency exponent. More...
 
void set_alignment_scores (istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, double haplotype_consistency_exponent=1)
 Same, but loading a 4x4 substitution score matrix from a stream. More...
 
void set_cache_size (int new_cache_size)
 
vector< MaximalExactMatchfind_mems_deep (string::const_iterator seq_begin, string::const_iterator seq_end, double &lcp_avg, double &fraction_filtered, int max_mem_length=0, int min_mem_length=1, int reseed_length=0, bool use_lcp_reseed_heuristic=false, bool use_diff_based_fast_reseed=false, bool include_parent_in_sub_mem_count=false, bool record_max_lcp=false, int reseed_below_count=0)
 
vector< MaximalExactMatchfind_mems_simple (string::const_iterator seq_begin, string::const_iterator seq_end, int max_mem_length=0, int min_mem_length=1, int reseed_length=0)
 
vector< MaximalExactMatchfind_stripped_matches (string::const_iterator seq_begin, string::const_iterator seq_end, size_t strip_length, size_t max_match_length, size_t target_count)
 
vector< MaximalExactMatchfind_fanout_mems (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator qual_begin, int max_fans_out, char max_fanout_base_quality)
 
vector< pos_twalk_fanout_path (string::const_iterator begin, string::const_iterator end, const list< pair< string::const_iterator, char >> &fanout_breaks, gcsa::node_type pos)
 
void rescue_high_count_order_length_mems (vector< MaximalExactMatch > &mems, size_t max_rescue_hit_count)
 
void precollapse_order_length_runs (string::const_iterator seq_begin, vector< MaximalExactMatch > &mems)
 
void prefilter_redundant_sub_mems (vector< MaximalExactMatch > &mems, vector< pair< int, vector< size_t >>> &sub_mem_containment_graph)
 
void find_sub_mems (const vector< MaximalExactMatch > &mems, int parent_layer_begin, int parent_layer_end, int mem_idx, string::const_iterator next_mem_end, int min_mem_length, vector< pair< MaximalExactMatch, vector< size_t >>> &sub_mems_out)
 
void find_sub_mems_fast (const vector< MaximalExactMatch > &mems, int parent_layer_begin, int parent_layer_end, int mem_idx, string::const_iterator leftmost_guaranteed_disjoint_bound, string::const_iterator leftmost_seeding_bound, int min_sub_mem_length, vector< pair< MaximalExactMatch, vector< size_t >>> &sub_mems_out)
 
set< pos_tsequence_positions (const string &seq)
 
size_t get_adaptive_min_reseed_length (size_t parent_mem_length)
 
void apply_haplotype_consistency_scores (const vector< Alignment * > &alns)
 
- Public Member Functions inherited from vg::AlignerClient
void set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 Set all the aligner scoring parameters and create the stored aligner instances. More...
 
void set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
void set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
- Public Member Functions inherited from vg::PairedEndMapper
void set_fragment_length_distr_params (size_t maximum_sample_size=1000, size_t reestimation_frequency=1000, double robust_estimation_fraction=0.95)
 
bool has_fixed_fragment_length_distr ()
 Returns true if fragment length distribution has been fixed. More...
 
void force_fragment_length_distr (double mean, double stddev)
 

Public Attributes

size_t max_branch_trim_length = 1
 
int64_t max_snarl_cut_size = 5
 
bool suppress_tail_anchors = false
 
size_t min_tail_anchor_length = 3
 
double band_padding_multiplier = 1.0
 
double pessimistic_tail_gap_multiplier = 0.0
 
size_t max_expected_dist_approx_error = 8
 
int32_t num_alt_alns = 4
 
double mem_coverage_min_ratio = 0.5
 
double max_suboptimal_path_score_ratio = 2.0
 
size_t num_mapping_attempts = 48
 
double log_likelihood_approx_factor = 1.0
 
size_t min_clustering_mem_length = 0
 
bool use_stripped_match_alg = false
 
size_t stripped_match_alg_strip_length = 16
 
size_t stripped_match_alg_max_length = 0
 
size_t stripped_match_alg_target_count = 5
 
bool use_fanout_match_alg = false
 
int max_fanout_base_quality = 20
 
int max_fans_out = 5
 
size_t max_p_value_memo_size = 500
 
size_t band_padding_memo_size = 2000
 
bool use_weibull_calibration = false
 
double max_exponential_rate_intercept = 0.7612
 
double max_exponential_rate_slope = 0.0001496
 
double max_exponential_shape_intercept = 12.37
 
double max_exponential_shape_slope = 0.007191
 
double weibull_scale_intercept = 1.05
 
double weibull_scale_slope = 0.0601
 
double weibull_shape_intercept = -0.176
 
double weibull_shape_slope = 0.199
 
double weibull_offset_intercept = 2.342
 
double weibull_offset_slope = 0.07168
 
double max_mapping_p_value = 0.00001
 
size_t max_alt_mappings = 1
 
size_t max_single_end_mappings_for_rescue = 64
 
size_t max_rescue_attempts = 32
 
size_t plausible_rescue_cluster_coverage_diff = 5
 
size_t secondary_rescue_attempts = 4
 
double secondary_rescue_score_diff = 1.0
 
double mapq_scaling_factor = 1.0
 
bool report_group_mapq = false
 
bool use_population_mapqs = false
 
size_t force_haplotype_count = 0
 
bool always_check_population = false
 
size_t population_max_paths = 10
 
size_t population_paths_hard_cap = 1000
 
bool top_tracebacks = false
 
double recombination_penalty = 20.7
 
size_t rescue_only_min = 128
 
size_t rescue_only_anchor_max = 16
 
size_t order_length_repeat_hit_max = 0
 
int32_t secondary_rescue_subopt_diff = 10
 
size_t min_median_mem_coverage_for_split = 0
 
bool suppress_cluster_merging = false
 
size_t alt_anchor_max_length_diff = 5
 
bool dynamic_max_alt_alns = false
 
bool simplify_topologies = false
 
bool use_tvs_clusterer = false
 
bool use_min_dist_clusterer = false
 
bool greedy_min_dist = false
 
bool component_min_dist = false
 
bool no_clustering = false
 
size_t reversing_walk_length = 0
 
bool suppress_p_value_memoization = false
 
size_t fragment_length_warning_factor = 0
 
size_t max_alignment_gap = 5000
 
bool suppress_mismapping_detection = false
 
- Public Attributes inherited from vg::BaseMapper
int sub_mem_thinning_burn_in = 16
 
int sub_mem_count_thinning = 4
 
int min_mem_length
 
int mem_reseed_length
 
bool fast_reseed = true
 
double fast_reseed_length_diff = 0.45
 
bool adaptive_reseed_diff = true
 
double adaptive_diff_exponent = 0.065
 
int hit_max = 0
 
int hard_hit_max = 0
 
bool use_approx_sub_mem_count = false
 
bool prefilter_redundant_hits = true
 
int max_sub_mem_recursion_depth = 2
 
bool use_greedy_mem_restarts = false
 
int greedy_restart_min_length = 40
 
int greedy_restart_max_count = 2
 
int greedy_restart_max_lcp = 0
 
bool greedy_restart_assume_substitution = false
 
int unpaired_penalty = 17
 
bool precollapse_order_length_hits = true
 
double avg_node_length = 0
 
size_t total_seq_length = 0
 
double recombination_penalty = 20.7
 
bool strip_bonuses
 
bool assume_acyclic
 
MappingQualityMethod mapping_quality_method
 
int max_mapping_quality
 
bool exclude_unaligned = false
 
bool debug = false
 Set to enable debugging messages to cerr from the mapper, so a user can understand why a read maps the way it does. More...
 
PathPositionHandleGraphxindex = nullptr
 
gcsa::GCSA * gcsa = nullptr
 
gcsa::LCPArray * lcp = nullptr
 
haplo::ScoreProviderhaplo_score_provider = nullptr
 
double haplotype_consistency_exponent = 1
 
- Public Attributes inherited from vg::AlignerClient
bool adjust_alignments_for_base_quality = false
 

Protected Member Functions

void multipath_map_internal (const Alignment &alignment, MappingQualityMethod mapq_method, vector< multipath_alignment_t > &multipath_alns_out)
 
void attempt_unpaired_multipath_map_of_pair (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer)
 
bool attempt_rescue (const multipath_alignment_t &multipath_aln, const Alignment &other_aln, bool rescue_forward, multipath_alignment_t &rescue_multipath_aln)
 
void align_to_cluster_graphs (const Alignment &alignment, MappingQualityMethod mapq_method, vector< clustergraph_t > &cluster_graphs, vector< multipath_alignment_t > &multipath_alns_out, size_t num_mapping_attempts, vector< size_t > *cluster_idxs=nullptr)
 
void align_to_cluster_graph_pairs (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< size_t, size_t >> &duplicate_pairs_out)
 
bool align_to_cluster_graphs_with_rescue (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, bool block_rescue_from_1, bool block_rescue_from_2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &pair_distances_out, vector< double > &pair_multiplicities_out)
 
void attempt_rescue_for_secondaries (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< size_t, size_t >> &duplicate_pairs, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs)
 
void attempt_rescue_of_repeat_from_non_repeat (const Alignment &alignment1, const Alignment &alignment2, const vector< MaximalExactMatch > &mems1, const vector< MaximalExactMatch > &mems2, bool do_repeat_rescue_from_1, bool do_repeat_rescue_from_2, vector< memcluster_t > &clusters1, vector< memcluster_t > &clusters2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &pair_distances, OrientedDistanceMeasurer &distance_measurer)
 
void merge_rescued_mappings (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< pair< multipath_alignment_t, multipath_alignment_t >> &rescued_multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &rescued_cluster_pairs, vector< double > &rescued_multiplicities) const
 Merge the rescued mappings into the output vector and deduplicate pairs. More...
 
vector< memcluster_tget_clusters (const Alignment &alignment, const vector< MaximalExactMatch > &mems, OrientedDistanceMeasurer *distance_measurer=nullptr) const
 
vector< pair< pair< size_t, size_t >, int64_t > > get_cluster_pairs (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, OrientedDistanceMeasurer *distance_measurer=nullptr)
 
vector< clustergraph_tquery_cluster_graphs (const Alignment &alignment, const vector< MaximalExactMatch > &mems, const vector< memcluster_t > &clusters)
 
void split_multicomponent_alignments (vector< multipath_alignment_t > &multipath_alns_out, vector< size_t > *cluster_idxs=nullptr) const
 
void split_multicomponent_alignments (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs) const
 
void multipath_align (const Alignment &alignment, const bdsg::HashGraph *graph, memcluster_t &graph_mems, multipath_alignment_t &multipath_aln_out) const
 
void make_nontrivial_multipath_alignment (const Alignment &alignment, const HandleGraph &subgraph, const function< pair< id_t, bool >(id_t)> &translator, SnarlManager &snarl_manager, multipath_alignment_t &multipath_aln_out) const
 
void strip_full_length_bonuses (multipath_alignment_t &multipath_aln) const
 Remove the full length bonus from all source or sink subpaths that received it. More...
 
int32_t compute_raw_mapping_quality_from_scores (const vector< double > &scores, MappingQualityMethod mapq_method, bool have_qualities, const vector< double > *multiplicities=nullptr) const
 
void sort_and_compute_mapping_quality (vector< multipath_alignment_t > &multipath_alns, MappingQualityMethod mapq_method, vector< size_t > *cluster_idxs=nullptr) const
 
void sort_and_compute_mapping_quality (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< pair< size_t, size_t >> *duplicate_pairs_out=nullptr, vector< double > *pair_multiplicities=nullptr) const
 
void cap_mapping_quality_by_rescue_probability (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, bool from_secondary_rescue) const
 
void cap_mapping_quality_by_hit_sampling_probability (vector< multipath_alignment_t > &multipath_alns_out, vector< size_t > &cluster_idxs, vector< clustergraph_t > &cluster_graphs) const
 
void cap_mapping_quality_by_hit_sampling_probability (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, bool did_secondary_rescue) const
 
double prob_equivalent_clusters_hits_missed (const memcluster_t &cluster) const
 
double fragment_length_log_likelihood (int64_t length) const
 Computes the log-likelihood of a given fragment length in the trained distribution. More...
 
bool likely_mismapping (const multipath_alignment_t &multipath_aln)
 Would an alignment this good be expected against a graph this big by chance alone. More...
 
size_t pseudo_length (const multipath_alignment_t &multipath_aln) const
 A scaling of a score so that it approximately follows the distribution of the longest match in p-value test. More...
 
double random_match_p_value (size_t match_length, size_t read_length)
 The approximate p-value for a match length of the given size against the current graph. More...
 
int64_t distance_between (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2, bool full_fragment=false, bool forward_strand=false) const
 
bool are_consistent (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2) const
 Are two multipath alignments consistently placed based on the learned fragment length distribution? More...
 
bool is_consistent (int64_t distance) const
 Is this a consistent inter-pair distance based on the learned fragment length distribution? More...
 
double read_coverage_z_score (int64_t coverage, const Alignment &alignment) const
 Computes the Z-score of the number of matches against an equal length random DNA string. More...
 
bool share_terminal_positions (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2) const
 
haploMath::RRMemoget_rr_memo (double recombination_penalty, size_t population_size) const
 Get a thread_local RRMemo with these parameters. More...
 
void establish_strand_consistency (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs)
 
vector< MaximalExactMatchfind_mems (const Alignment &alignment)
 Return exact matches according to the object's parameters. More...
 
- Protected Member Functions inherited from vg::AlignerClient
 AlignerClient (double gc_content_estimate=vg::default_gc_content)
 
const GSSWAlignerget_aligner (bool have_qualities=true) const
 
const QualAdjAlignerget_qual_adj_aligner () const
 
const Alignerget_regular_aligner () const
 

Static Protected Member Functions

static int64_t read_coverage (const memcluster_t &mem_hits)
 Computes the number of read bases a cluster of MEM hits covers. More...
 

Protected Attributes

SnarlManagersnarl_manager
 
MinimumDistanceIndexdistance_index
 
PathComponentIndex path_component_index
 
vector< size_t > band_padding_memo
 
- Protected Attributes inherited from vg::PairedEndMapper
FragmentLengthDistribution fragment_length_distr
 Holds the actual fragment length distribution and estimation information. More...
 

Static Protected Attributes

static thread_local unordered_map< pair< double, size_t >, haploMath::RRMemorr_memos
 Memos used by population model. More...
 
static thread_local unordered_map< pair< size_t, size_t >, double > p_value_memo
 

Additional Inherited Members

- Static Public Member Functions inherited from vg::BaseMapper
static double estimate_gc_content (const gcsa::GCSA *gcsa)
 
- Static Public Member Functions inherited from vg::AlignerClient
static int8_t * parse_matrix (std::istream &matrix_stream)
 Allocates an array to hold a 4x4 substitution matrix and returns it. More...
 
- Static Public Attributes inherited from vg::BaseMapper
static thread_local vector< size_t > adaptive_reseed_length_memo
 

Member Typedef Documentation

◆ clustergraph_t

This represents a graph for a cluster, and holds a pointer to the actual extracted graph, a list of assigned MEMs, and the number of bases of read coverage that that MEM cluster provides (which serves as a priority).

◆ memcluster_t

We often pass around clusters of MEMs and their graph positions.

Constructor & Destructor Documentation

◆ MultipathMapper()

vg::MultipathMapper::MultipathMapper ( PathPositionHandleGraph graph,
gcsa::GCSA *  gcsa_index,
gcsa::LCPArray *  lcp_array,
haplo::ScoreProvider haplo_score_provider = nullptr,
SnarlManager snarl_manager = nullptr,
MinimumDistanceIndex distance_index = nullptr 
)

◆ ~MultipathMapper()

vg::MultipathMapper::~MultipathMapper ( )

Member Function Documentation

◆ align_to_cluster_graph_pairs()

void vg::MultipathMapper::align_to_cluster_graph_pairs ( const Alignment alignment1,
const Alignment alignment2,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< size_t, size_t >> &  duplicate_pairs_out 
)
protected

After clustering MEMs, extracting graphs, assigning hits to cluster graphs, and determining which cluster graph pairs meet the fragment length distance constraints, perform multipath alignment Produces topologically sorted multipath_alignment_ts.

◆ align_to_cluster_graphs()

void vg::MultipathMapper::align_to_cluster_graphs ( const Alignment alignment,
MappingQualityMethod  mapq_method,
vector< clustergraph_t > &  cluster_graphs,
vector< multipath_alignment_t > &  multipath_alns_out,
size_t  num_mapping_attempts,
vector< size_t > *  cluster_idxs = nullptr 
)
protected

After clustering MEMs, extracting graphs, and assigning hits to cluster graphs, perform multipath alignment. Produces topologically sorted multipath_alignment_ts.

◆ align_to_cluster_graphs_with_rescue()

bool vg::MultipathMapper::align_to_cluster_graphs_with_rescue ( const Alignment alignment1,
const Alignment alignment2,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
bool  block_rescue_from_1,
bool  block_rescue_from_2,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  pair_distances_out,
vector< double > &  pair_multiplicities_out 
)
protected

Align the read ends independently, but also try to form rescue alignments for each from the other. Return true if output obeys pair consistency and false otherwise. Produces topologically sorted multipath_alignment_ts.

◆ are_consistent()

bool vg::MultipathMapper::are_consistent ( const multipath_alignment_t multipath_aln_1,
const multipath_alignment_t multipath_aln_2 
) const
protected

Are two multipath alignments consistently placed based on the learned fragment length distribution?

◆ attempt_rescue()

bool vg::MultipathMapper::attempt_rescue ( const multipath_alignment_t multipath_aln,
const Alignment other_aln,
bool  rescue_forward,
multipath_alignment_t rescue_multipath_aln 
)
protected

Extracts a section of graph at a distance from the multipath_alignment_t based on the fragment length distribution and attempts to align the other paired read to it. If rescuing forward, assumes the provided multipath_alignment_t is the first read and vice versa if rescuing backward. Rescue constructs a conventional local alignment with gssw and converts the Alignment to a multipath_alignment_t. The multipath_alignment_t will be stored in the object passed by reference as an argument.

◆ attempt_rescue_for_secondaries()

void vg::MultipathMapper::attempt_rescue_for_secondaries ( const Alignment alignment1,
const Alignment alignment2,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
vector< pair< size_t, size_t >> &  duplicate_pairs,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs 
)
protected

Use the rescue routine on strong suboptimal clusters to see if we can find a good secondary. Produces topologically sorted multipath_alignment_ts.

◆ attempt_rescue_of_repeat_from_non_repeat()

void vg::MultipathMapper::attempt_rescue_of_repeat_from_non_repeat ( const Alignment alignment1,
const Alignment alignment2,
const vector< MaximalExactMatch > &  mems1,
const vector< MaximalExactMatch > &  mems2,
bool  do_repeat_rescue_from_1,
bool  do_repeat_rescue_from_2,
vector< memcluster_t > &  clusters1,
vector< memcluster_t > &  clusters2,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  pair_distances,
OrientedDistanceMeasurer distance_measurer 
)
protected

Cluster and extract subgraphs for (possibly) only one end, meant to be a non-repeat, and use them to rescue an alignment for the other end, meant to be a repeat. Produces topologically sorted multipath_alignment_ts.

◆ attempt_unpaired_multipath_map_of_pair()

void vg::MultipathMapper::attempt_unpaired_multipath_map_of_pair ( const Alignment alignment1,
const Alignment alignment2,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< Alignment, Alignment >> &  ambiguous_pair_buffer 
)
protected

Before the fragment length distribution has been estimated, look for an unambiguous mapping of the reads using the single ended routine. If we find one record the fragment length and report the pair, if we don't find one, add the read pair to a buffer instead of the output vector.

◆ calibrate_mismapping_detection()

void vg::MultipathMapper::calibrate_mismapping_detection ( size_t  num_simulations,
const vector< size_t > &  simulated_read_lengths 
)

Map random sequences against the graph to calibrate a parameterized distribution that detects when mappings are likely to have occurred by chance

◆ cap_mapping_quality_by_hit_sampling_probability() [1/2]

void vg::MultipathMapper::cap_mapping_quality_by_hit_sampling_probability ( vector< multipath_alignment_t > &  multipath_alns_out,
vector< size_t > &  cluster_idxs,
vector< clustergraph_t > &  cluster_graphs 
) const
protected

Estimates the probability that the correct cluster was not identified because of sub-sampling MEM hits and caps the mapping quality to this probability (in Phred scale)

◆ cap_mapping_quality_by_hit_sampling_probability() [2/2]

void vg::MultipathMapper::cap_mapping_quality_by_hit_sampling_probability ( vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
bool  did_secondary_rescue 
) const
protected

Estimates the probability that the correct cluster pair was not identified because of sub-sampling MEM hits and caps the mapping quality to this probability (in Phred scale)

◆ cap_mapping_quality_by_rescue_probability()

void vg::MultipathMapper::cap_mapping_quality_by_rescue_probability ( vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
bool  from_secondary_rescue 
) const
protected

Estimates the probability that the correct cluster was not chosen as a cluster to rescue from and caps the mapping quality to the minimum of the current mapping quality and this probability (in Phred scale)

◆ compute_raw_mapping_quality_from_scores()

int32_t vg::MultipathMapper::compute_raw_mapping_quality_from_scores ( const vector< double > &  scores,
MappingQualityMethod  mapq_method,
bool  have_qualities,
const vector< double > *  multiplicities = nullptr 
) const
protected

Compute a mapping quality from a list of scores, using the selected method. Optionally considers non-present duplicates of the scores encoded as multiplicities

◆ distance_between()

int64_t vg::MultipathMapper::distance_between ( const multipath_alignment_t multipath_aln_1,
const multipath_alignment_t multipath_aln_2,
bool  full_fragment = false,
bool  forward_strand = false 
) const
protected

Compute the approximate distance between two multipath alignments If either is unmapped, or the distance cannot be obtained, returns numeric_limits<int64_t>::max()

◆ establish_strand_consistency()

void vg::MultipathMapper::establish_strand_consistency ( vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs 
)
protected

Detects if each pair can be assigned to a consistent strand of a path, and if not removes them. Also inverts the distances in the cluster pairs vector according to the strand

◆ find_mems()

vector< MaximalExactMatch > vg::MultipathMapper::find_mems ( const Alignment alignment)
protected

Return exact matches according to the object's parameters.

◆ fragment_length_log_likelihood()

double vg::MultipathMapper::fragment_length_log_likelihood ( int64_t  length) const
protected

Computes the log-likelihood of a given fragment length in the trained distribution.

◆ get_cluster_pairs()

vector< pair< pair< size_t, size_t >, int64_t > > vg::MultipathMapper::get_cluster_pairs ( const Alignment alignment1,
const Alignment alignment2,
vector< clustergraph_t > &  cluster_graphs1,
vector< clustergraph_t > &  cluster_graphs2,
OrientedDistanceMeasurer distance_measurer = nullptr 
)
protected

Use the oriented distance clusterer or the TVS clusterer to cluster pairs of clusters. Assumes that the fragment length distribution has been estimated and fixed.

◆ get_clusters()

vector< MultipathMapper::memcluster_t > vg::MultipathMapper::get_clusters ( const Alignment alignment,
const vector< MaximalExactMatch > &  mems,
OrientedDistanceMeasurer distance_measurer = nullptr 
) const
protected

Use the oriented distance clusterer or the TVS clusterer to cluster MEMs depending on parameters. If using oriented distance cluster, must alo provide an oriented distance measurer.

◆ get_rr_memo()

haploMath::RRMemo & vg::MultipathMapper::get_rr_memo ( double  recombination_penalty,
size_t  population_size 
) const
protected

Get a thread_local RRMemo with these parameters.

◆ init_band_padding_memo()

void vg::MultipathMapper::init_band_padding_memo ( )

Should be called once after construction, or any time the band padding multiplier is changed.

◆ is_consistent()

bool vg::MultipathMapper::is_consistent ( int64_t  distance) const
protected

Is this a consistent inter-pair distance based on the learned fragment length distribution?

◆ likely_mismapping()

bool vg::MultipathMapper::likely_mismapping ( const multipath_alignment_t multipath_aln)
protected

Would an alignment this good be expected against a graph this big by chance alone.

◆ make_nontrivial_multipath_alignment()

void vg::MultipathMapper::make_nontrivial_multipath_alignment ( const Alignment alignment,
const HandleGraph subgraph,
const function< pair< id_t, bool >(id_t)> &  translator,
SnarlManager snarl_manager,
multipath_alignment_t multipath_aln_out 
) const
protected

Removes the sections of an Alignment's path within snarls and re-aligns them with multiple traceback to create a multipath alignment with non-trivial topology. Guarantees that the resulting multipath_alignment_t is in topological order.

◆ merge_rescued_mappings()

void vg::MultipathMapper::merge_rescued_mappings ( vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  rescued_multipath_aln_pairs,
vector< pair< pair< size_t, size_t >, int64_t >> &  rescued_cluster_pairs,
vector< double > &  rescued_multiplicities 
) const
protected

Merge the rescued mappings into the output vector and deduplicate pairs.

◆ multipath_align()

void vg::MultipathMapper::multipath_align ( const Alignment alignment,
const bdsg::HashGraph graph,
memcluster_t graph_mems,
multipath_alignment_t multipath_aln_out 
) const
protected

Make a multipath alignment of the read against the indicated graph and add it to the list of multimappings. Does NOT necessarily produce a multipath_alignment_t in topological order.

◆ multipath_map()

void vg::MultipathMapper::multipath_map ( const Alignment alignment,
vector< multipath_alignment_t > &  multipath_alns_out 
)

Map read in alignment to graph and make multipath alignments.

◆ multipath_map_internal()

void vg::MultipathMapper::multipath_map_internal ( const Alignment alignment,
MappingQualityMethod  mapq_method,
vector< multipath_alignment_t > &  multipath_alns_out 
)
protected

Wrapped internal function that allows some code paths to circumvent the current mapping quality method option.

◆ multipath_map_paired()

void vg::MultipathMapper::multipath_map_paired ( const Alignment alignment1,
const Alignment alignment2,
vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< Alignment, Alignment >> &  ambiguous_pair_buffer 
)

Map a paired read to the graph and make paired multipath alignments. Assumes reads are on the same strand of the DNA/RNA molecule. If the fragment length distribution is still being estimated and the pair cannot be mapped unambiguously, adds the reads to a buffer for ambiguous pairs and does not output any multipath alignments.

◆ prob_equivalent_clusters_hits_missed()

double vg::MultipathMapper::prob_equivalent_clusters_hits_missed ( const memcluster_t cluster) const
protected

Estimates the probability that a cluster with the same hits would have been missed because of subsampling high-count SMEMs

◆ pseudo_length()

size_t vg::MultipathMapper::pseudo_length ( const multipath_alignment_t multipath_aln) const
protected

A scaling of a score so that it approximately follows the distribution of the longest match in p-value test.

◆ query_cluster_graphs()

auto vg::MultipathMapper::query_cluster_graphs ( const Alignment alignment,
const vector< MaximalExactMatch > &  mems,
const vector< memcluster_t > &  clusters 
)
protected

Extracts a subgraph around each cluster of MEMs that encompasses any graph position reachable (according to the Mapper's aligner) with local alignment anchored at the MEMs. If any subgraphs overlap, they are merged into one subgraph. Returns a vector of all the merged cluster subgraphs, their MEMs assigned from the mems vector according to the MEMs' hits, and their read coverages in bp. The caller must delete the VG objects produced!

◆ random_match_p_value()

double vg::MultipathMapper::random_match_p_value ( size_t  match_length,
size_t  read_length 
)
protected

The approximate p-value for a match length of the given size against the current graph.

◆ read_coverage()

int64_t vg::MultipathMapper::read_coverage ( const memcluster_t mem_hits)
staticprotected

Computes the number of read bases a cluster of MEM hits covers.

◆ read_coverage_z_score()

double vg::MultipathMapper::read_coverage_z_score ( int64_t  coverage,
const Alignment alignment 
) const
protected

Computes the Z-score of the number of matches against an equal length random DNA string.

◆ reduce_to_single_path()

void vg::MultipathMapper::reduce_to_single_path ( const multipath_alignment_t multipath_aln,
vector< Alignment > &  alns_out,
size_t  max_number 
) const

Given a mapped multipath_alignment_t, reduce it to up to max_number + 1 nonoverlapping single path alignments, with mapping qualities accounting for positional uncertainty between them. Even if the read is unmapped, there will always be at least one (possibly score 0) output alignment.

◆ set_automatic_min_clustering_length()

void vg::MultipathMapper::set_automatic_min_clustering_length ( double  random_mem_probability = 0.5)

Sets the minimum clustering MEM length to the approximate length that a MEM would have to be to have at most the given probability of occurring in random sequence of the same size as the graph

◆ share_terminal_positions()

bool vg::MultipathMapper::share_terminal_positions ( const multipath_alignment_t multipath_aln_1,
const multipath_alignment_t multipath_aln_2 
) const
protected

Return true if any of the initial positions of the source Subpaths are shared between the two multipath alignments

◆ sort_and_compute_mapping_quality() [1/2]

void vg::MultipathMapper::sort_and_compute_mapping_quality ( vector< multipath_alignment_t > &  multipath_alns,
MappingQualityMethod  mapq_method,
vector< size_t > *  cluster_idxs = nullptr 
) const
protected

Sorts mappings by score and store mapping quality of the optimal alignment in the multipath_alignment_t object Optionally also sorts a vector of indexes to keep track of the cluster-of-origin Allows multipath alignments where the best single path alignment is leaving the read unmapped. multipath_alignment_ts MUST be topologically sorted.

Get all the linearizations we are going to work with, possibly with duplicates. The first alignment will be optimal.

◆ sort_and_compute_mapping_quality() [2/2]

void vg::MultipathMapper::sort_and_compute_mapping_quality ( vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs,
vector< pair< size_t, size_t >> *  duplicate_pairs_out = nullptr,
vector< double > *  pair_multiplicities = nullptr 
) const
protected

Sorts mappings by score and store mapping quality of the optimal alignment in the multipath_alignment_t object If there are ties between scores, breaks them by the expected distance between pairs as computed by the OrientedDistanceClusterer::cluster_pairs function (modified cluster_pairs vector) Allows multipath alignments where the best single path alignment is leaving the read unmapped. multipath_alignment_ts MUST be topologically sorted. Optionally considers non-present duplicates of the scores encoded as multiplicities

◆ split_multicomponent_alignments() [1/2]

void vg::MultipathMapper::split_multicomponent_alignments ( vector< multipath_alignment_t > &  multipath_alns_out,
vector< size_t > *  cluster_idxs = nullptr 
) const
protected

If there are any multipath_alignment_ts with multiple connected components, split them up and add them to the return vector. Properly handles multipath_alignment_ts that are unmapped. Does not depend on or guarantee topological order in the multipath_alignment_ts.

◆ split_multicomponent_alignments() [2/2]

void vg::MultipathMapper::split_multicomponent_alignments ( vector< pair< multipath_alignment_t, multipath_alignment_t >> &  multipath_aln_pairs_out,
vector< pair< pair< size_t, size_t >, int64_t >> &  cluster_pairs 
) const
protected

If there are any multipath_alignment_ts with multiple connected components, split them up and add them to the return vector, also measure the distance between them and add a record to the cluster pairs vector. Properly handles multipath_alignment_ts that are unmapped. Does not depend on or guarantee topological order in the multipath_alignment_ts.

◆ strip_full_length_bonuses()

void vg::MultipathMapper::strip_full_length_bonuses ( multipath_alignment_t multipath_aln) const
protected

Remove the full length bonus from all source or sink subpaths that received it.

Member Data Documentation

◆ alt_anchor_max_length_diff

size_t vg::MultipathMapper::alt_anchor_max_length_diff = 5

◆ always_check_population

bool vg::MultipathMapper::always_check_population = false

◆ band_padding_memo

vector<size_t> vg::MultipathMapper::band_padding_memo
protected

◆ band_padding_memo_size

size_t vg::MultipathMapper::band_padding_memo_size = 2000

◆ band_padding_multiplier

double vg::MultipathMapper::band_padding_multiplier = 1.0

◆ component_min_dist

bool vg::MultipathMapper::component_min_dist = false

◆ distance_index

MinimumDistanceIndex* vg::MultipathMapper::distance_index
protected

◆ dynamic_max_alt_alns

bool vg::MultipathMapper::dynamic_max_alt_alns = false

◆ force_haplotype_count

size_t vg::MultipathMapper::force_haplotype_count = 0

◆ fragment_length_warning_factor

size_t vg::MultipathMapper::fragment_length_warning_factor = 0

◆ greedy_min_dist

bool vg::MultipathMapper::greedy_min_dist = false

◆ log_likelihood_approx_factor

double vg::MultipathMapper::log_likelihood_approx_factor = 1.0

◆ mapq_scaling_factor

double vg::MultipathMapper::mapq_scaling_factor = 1.0

◆ max_alignment_gap

size_t vg::MultipathMapper::max_alignment_gap = 5000

◆ max_alt_mappings

size_t vg::MultipathMapper::max_alt_mappings = 1

◆ max_branch_trim_length

size_t vg::MultipathMapper::max_branch_trim_length = 1

◆ max_expected_dist_approx_error

size_t vg::MultipathMapper::max_expected_dist_approx_error = 8

◆ max_exponential_rate_intercept

double vg::MultipathMapper::max_exponential_rate_intercept = 0.7612

◆ max_exponential_rate_slope

double vg::MultipathMapper::max_exponential_rate_slope = 0.0001496

◆ max_exponential_shape_intercept

double vg::MultipathMapper::max_exponential_shape_intercept = 12.37

◆ max_exponential_shape_slope

double vg::MultipathMapper::max_exponential_shape_slope = 0.007191

◆ max_fanout_base_quality

int vg::MultipathMapper::max_fanout_base_quality = 20

◆ max_fans_out

int vg::MultipathMapper::max_fans_out = 5

◆ max_mapping_p_value

double vg::MultipathMapper::max_mapping_p_value = 0.00001

◆ max_p_value_memo_size

size_t vg::MultipathMapper::max_p_value_memo_size = 500

◆ max_rescue_attempts

size_t vg::MultipathMapper::max_rescue_attempts = 32

◆ max_single_end_mappings_for_rescue

size_t vg::MultipathMapper::max_single_end_mappings_for_rescue = 64

◆ max_snarl_cut_size

int64_t vg::MultipathMapper::max_snarl_cut_size = 5

◆ max_suboptimal_path_score_ratio

double vg::MultipathMapper::max_suboptimal_path_score_ratio = 2.0

◆ mem_coverage_min_ratio

double vg::MultipathMapper::mem_coverage_min_ratio = 0.5

◆ min_clustering_mem_length

size_t vg::MultipathMapper::min_clustering_mem_length = 0

◆ min_median_mem_coverage_for_split

size_t vg::MultipathMapper::min_median_mem_coverage_for_split = 0

◆ min_tail_anchor_length

size_t vg::MultipathMapper::min_tail_anchor_length = 3

◆ no_clustering

bool vg::MultipathMapper::no_clustering = false

◆ num_alt_alns

int32_t vg::MultipathMapper::num_alt_alns = 4

◆ num_mapping_attempts

size_t vg::MultipathMapper::num_mapping_attempts = 48

◆ order_length_repeat_hit_max

size_t vg::MultipathMapper::order_length_repeat_hit_max = 0

◆ p_value_memo

thread_local unordered_map< pair< size_t, size_t >, double > vg::MultipathMapper::p_value_memo
staticprotected

◆ path_component_index

PathComponentIndex vg::MultipathMapper::path_component_index
protected

◆ pessimistic_tail_gap_multiplier

double vg::MultipathMapper::pessimistic_tail_gap_multiplier = 0.0

◆ plausible_rescue_cluster_coverage_diff

size_t vg::MultipathMapper::plausible_rescue_cluster_coverage_diff = 5

◆ population_max_paths

size_t vg::MultipathMapper::population_max_paths = 10

◆ population_paths_hard_cap

size_t vg::MultipathMapper::population_paths_hard_cap = 1000

◆ recombination_penalty

double vg::MultipathMapper::recombination_penalty = 20.7

◆ report_group_mapq

bool vg::MultipathMapper::report_group_mapq = false

◆ rescue_only_anchor_max

size_t vg::MultipathMapper::rescue_only_anchor_max = 16

◆ rescue_only_min

size_t vg::MultipathMapper::rescue_only_min = 128

◆ reversing_walk_length

size_t vg::MultipathMapper::reversing_walk_length = 0

◆ rr_memos

thread_local unordered_map< pair< double, size_t >, haploMath::RRMemo > vg::MultipathMapper::rr_memos
staticprotected

Memos used by population model.

◆ secondary_rescue_attempts

size_t vg::MultipathMapper::secondary_rescue_attempts = 4

◆ secondary_rescue_score_diff

double vg::MultipathMapper::secondary_rescue_score_diff = 1.0

◆ secondary_rescue_subopt_diff

int32_t vg::MultipathMapper::secondary_rescue_subopt_diff = 10

◆ simplify_topologies

bool vg::MultipathMapper::simplify_topologies = false

◆ snarl_manager

SnarlManager* vg::MultipathMapper::snarl_manager
protected

◆ stripped_match_alg_max_length

size_t vg::MultipathMapper::stripped_match_alg_max_length = 0

◆ stripped_match_alg_strip_length

size_t vg::MultipathMapper::stripped_match_alg_strip_length = 16

◆ stripped_match_alg_target_count

size_t vg::MultipathMapper::stripped_match_alg_target_count = 5

◆ suppress_cluster_merging

bool vg::MultipathMapper::suppress_cluster_merging = false

◆ suppress_mismapping_detection

bool vg::MultipathMapper::suppress_mismapping_detection = false

◆ suppress_p_value_memoization

bool vg::MultipathMapper::suppress_p_value_memoization = false

◆ suppress_tail_anchors

bool vg::MultipathMapper::suppress_tail_anchors = false

◆ top_tracebacks

bool vg::MultipathMapper::top_tracebacks = false

◆ use_fanout_match_alg

bool vg::MultipathMapper::use_fanout_match_alg = false

◆ use_min_dist_clusterer

bool vg::MultipathMapper::use_min_dist_clusterer = false

◆ use_population_mapqs

bool vg::MultipathMapper::use_population_mapqs = false

◆ use_stripped_match_alg

bool vg::MultipathMapper::use_stripped_match_alg = false

◆ use_tvs_clusterer

bool vg::MultipathMapper::use_tvs_clusterer = false

◆ use_weibull_calibration

bool vg::MultipathMapper::use_weibull_calibration = false

◆ weibull_offset_intercept

double vg::MultipathMapper::weibull_offset_intercept = 2.342

◆ weibull_offset_slope

double vg::MultipathMapper::weibull_offset_slope = 0.07168

◆ weibull_scale_intercept

double vg::MultipathMapper::weibull_scale_intercept = 1.05

◆ weibull_scale_slope

double vg::MultipathMapper::weibull_scale_slope = 0.0601

◆ weibull_shape_intercept

double vg::MultipathMapper::weibull_shape_intercept = -0.176

◆ weibull_shape_slope

double vg::MultipathMapper::weibull_shape_slope = 0.199

The documentation for this class was generated from the following files: