vg
tools for working with variation graphs
Classes | Typedefs | Functions
vg::algorithms Namespace Reference

Classes

struct  GFAFormatError
 This exception will be thrown if the GFA data is not acceptable. More...
 
struct  kmer_t
 Stores a kmer in the context of a graph. More...
 
struct  walk_t
 Stores a walk in the context of a graph. More...
 

Typedefs

using BinnedDepthIndex = unordered_map< string, map< size_t, map< size_t, pair< float, float > >> >
 

Functions

template<class DistHeuristic >
vector< handle_ta_star (const HandleGraph *graph, const pos_t &pos_1, const pos_t &pos_2, const DistHeuristic &dist_heuristic, bool find_min=true, int64_t extremal_distance=numeric_limits< int64_t >::max(), bool monotonic_heuristic=true)
 
unordered_map< path_handle_t, vector< pair< size_t, bool > > > alignment_path_offsets (const PathPositionHandleGraph &graph, const Alignment &aln, bool just_min, bool nearby, size_t search_limit)
 
unordered_map< path_handle_t, vector< pair< size_t, bool > > > multipath_alignment_path_offsets (const PathPositionHandleGraph &graph, const multipath_alignment_t &mp_aln)
 
void annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit)
 
void annotate_with_node_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit)
 
void annotate_with_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, bool just_min, size_t search_limit)
 
void annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, vector< Alignment > &alns, size_t search_limit)
 
void append_handle_graph (const HandleGraph *from, MutableHandleGraph *into)
 Append the heads of from to the tails of into. More...
 
void append_path_handle_graph (const PathHandleGraph *from, MutablePathMutableHandleGraph *into, bool only_connect_path_tips)
 
unordered_set< id_tapply_orientations (MutableHandleGraph *graph, const vector< handle_t > &orientations)
 
size_t min_approx_path_distance (const PathPositionHandleGraph *graph, const pos_t &pos1, const pos_t &pos2, uint64_t max_search)
 use the embedded paths to get an estimated minimum distance between the positions More...
 
bool are_equivalent (const HandleGraph *graph_1, const HandleGraph *graph_2, bool verbose)
 
bool are_equivalent_with_paths (const PathHandleGraph *graph_1, const PathHandleGraph *graph_2, bool verbose)
 
void copy_handle_graph (const HandleGraph *from, MutableHandleGraph *into)
 Copies the nodes and edges from one graph into another. More...
 
void copy_path_handle_graph (const PathHandleGraph *from, MutablePathMutableHandleGraph *into)
 Copies the nodes, edges, and paths from one graph into another. More...
 
void copy_path (const PathHandleGraph *from, const path_handle_t &path, MutablePathHandleGraph *into)
 
tuple< vector< handle_t >, unordered_map< handle_t, size_t >, bool > count_walks_through_nodes (const HandleGraph *graph)
 
size_t count_walks (const HandleGraph *graph)
 
void packed_depths (const Packer &packer, const string &path_name, size_t min_coverage, ostream &out_stream)
 
pair< double, double > packed_depth_of_bin (const Packer &packer, step_handle_t start_step, step_handle_t end_plus_one_step, size_t min_coverage, bool include_deletions)
 
vector< tuple< size_t, size_t, double, double > > binned_packed_depth (const Packer &packer, const string &path_name, size_t bin_size, size_t min_coverage, bool include_deletions)
 
BinnedDepthIndex binned_packed_depth_index (const Packer &packer, const vector< string > &path_names, size_t min_bin_size, size_t max_bin_size, double exp_growth_factor, size_t min_coverage, bool include_deletions, bool std_err)
 
pair< float, float > get_depth_from_index (const BinnedDepthIndex &depth_index, const string &path_name, size_t start_offset, size_t end_offset)
 Query index created above. More...
 
pair< double, double > sample_mapping_depth (const HandleGraph &graph, const string &input_filename, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq, const string &format)
 
pair< double, double > sample_gam_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq)
 
pair< double, double > sample_mapping_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq)
 As above, but read a vector instead of a stream. More...
 
unordered_map< id_t, id_tdagify (const HandleGraph *graph, MutableHandleGraph *into, size_t min_preserved_path_length)
 
void dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn, const function< void(const edge_t &)> &edge_fn, const function< void(const edge_t &)> &tree_fn, const function< void(const edge_t &)> &edge_curr_fn, const function< void(const edge_t &)> &edge_cross_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks)
 
void dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks)
 
void dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn)
 
bool dijkstra (const HandleGraph *g, handle_t start, function< bool(const handle_t &, size_t)> reached_callback, bool traverse_leftward)
 
bool dijkstra (const HandleGraph *g, const unordered_set< handle_t > &starts, function< bool(const handle_t &, size_t)> reached_callback, bool traverse_leftward)
 
bool is_head_node (handle_t h, const HandleGraph *g)
 
int32_t distance_to_head (handle_t h, int32_t limit, const HandleGraph *graph)
 
int32_t distance_to_head (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph)
 
bool is_tail_node (handle_t h, const HandleGraph *g)
 
int32_t distance_to_tail (handle_t h, int32_t limit, const HandleGraph *graph)
 
int32_t distance_to_tail (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph)
 
vector< handle_teades_algorithm (const HandleGraph *graph)
 
void expand_context_by_steps (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward)
 
void expand_context_by_length (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward)
 
void expand_context (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward)
 
void expand_context_with_paths (const PathHandleGraph *source, MutablePathMutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward)
 
void extend (const HandleGraph *source, MutableHandleGraph *into)
 
unordered_map< id_t, id_textract_connecting_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_len, pos_t pos_1, pos_t pos_2, bool detect_terminal_cycles, bool only_paths, bool strict_max_len)
 
void extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &forward_search_lengths, const vector< size_t > &backward_search_lengths, size_t reversing_walk_length)
 
void extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, size_t max_dist, size_t reversing_walk_length)
 
void extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &position_max_dist, size_t reversing_walk_length)
 
unordered_map< id_t, id_textract_extending_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_dist, pos_t pos, bool backward, bool preserve_cycles_on_src_node)
 
unordered_map< handle_t, size_t > find_shortest_paths (const HandleGraph *g, handle_t start, bool traverse_leftward)
 
vector< handle_thead_nodes (const HandleGraph *g)
 Find all of the nodes with no edges on their left sides. More...
 
vector< handle_ttail_nodes (const HandleGraph *g)
 Find all of the nodes with no edges on their right sides. More...
 
vector< handle_tfind_tips (const HandleGraph *g)
 
nid_t parse_gfa_sequence_id (const string &str)
 
void validate_gfa_edge (const gfak::edge_elem &e)
 
string process_raw_gfa_path_name (const string &path_name_raw)
 
void gfa_to_handle_graph_in_memory (istream &in, MutableHandleGraph *graph, gfak::GFAKluge &gg)
 
void gfa_to_handle_graph_on_disk (const string &filename, MutableHandleGraph *graph, bool try_id_increment_hint, gfak::GFAKluge &gg)
 
void gfa_to_handle_graph_load_graph (const string &filename, istream *unseekable, MutableHandleGraph *graph, bool try_id_increment_hint, gfak::GFAKluge &gg)
 
void gfa_to_handle_graph_add_paths (const string &filename, istream *unseekable, MutablePathHandleGraph *graph, gfak::GFAKluge &gg)
 
void gfa_to_handle_graph (const string &filename, MutableHandleGraph *graph, bool try_from_disk, bool try_id_increment_hint)
 
void gfa_to_path_handle_graph (const string &filename, MutablePathMutableHandleGraph *graph, bool try_from_disk=true, bool try_id_increment_hint=false)
 Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph. More...
 
void gfa_to_path_handle_graph_in_memory (istream &in, MutablePathMutableHandleGraph *graph)
 
vector< handle_tid_order (const HandleGraph *g)
 
bool is_acyclic (const HandleGraph *graph)
 
bool is_directed_acyclic (const HandleGraph *graph)
 
bool is_single_stranded (const HandleGraph *graph)
 
vector< handle_tsingle_stranded_orientation (const HandleGraph *graph)
 
unordered_set< id_tmake_single_stranded (MutableHandleGraph *graph)
 
vector< pos_tjump_along_closest_path (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t jump_dist, size_t max_search_dist)
 
pair< double, vector< handle_t > > widest_dijkstra (const HandleGraph *g, handle_t source, handle_t sink, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, function< bool(const handle_t &)> is_node_ignored_callback, function< bool(const edge_t &)> is_edge_ignored_callback, bool greedy_avg)
 
vector< pair< double, vector< handle_t > > > yens_k_widest_paths (const HandleGraph *g, handle_t source, handle_t sink, size_t K, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, bool greedy_avg=false)
 Find the k widest paths. More...
 
void for_each_kmer (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const kmer_t &)> &lambda)
 Iterate over all the kmers in the graph, running lambda on each. More...
 
std::ostream & operator<< (std::ostream &out, const kmer_t &kmer)
 Print a kmer_t to a stream. More...
 
void merge (handlegraph::MutablePathDeletableHandleGraph *graph, const vector< pair< handle_t, size_t >> &start, size_t length)
 
unordered_map< path_handle_t, vector< pair< size_t, bool > > > nearest_offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t max_search)
 
map< string, vector< pair< size_t, bool > > > offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos)
 
unordered_map< path_handle_t, vector< pair< size_t, bool > > > simple_offsets_in_paths (const PathPositionHandleGraph *graph, pos_t pos)
 A "simple" model for path position getting for debugging. More...
 
map< pos_t, char > next_pos_chars (const PathPositionHandleGraph &graph, pos_t pos)
 
void normalize (handlegraph::MutablePathDeletableHandleGraph *graph, int max_iter, bool debug)
 
void append_mapping_sequence (const Mapping &m, const string &node_seq, string &seq)
 use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping More...
 
string path_string (const HandleGraph &graph, const Path &path)
 use the given graph and the path to determine our path string More...
 
void remove_high_degree_nodes (DeletableHandleGraph &g, int max_degree)
 
unordered_map< id_t, pair< id_t, bool > > reverse_complement_graph (const HandleGraph *source, MutableHandleGraph *into)
 
size_t bellman_ford_shortest_cycle_length (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges)
 
size_t dijkstra_shortest_cycle_length (const HandleGraph *graph, const handle_t &source)
 Simple Dijkstra implementation that computes shortest cycle. More...
 
size_t shortest_cycle_length_internal (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges)
 
size_t shortest_cycle_length (const HandleGraph *graph, const handle_t &source)
 
size_t shortest_cycle_length (const HandleGraph *graph)
 
bool simplify_siblings (handlegraph::MutablePathDeletableHandleGraph *graph)
 
vector< pair< id_t, id_t > > sorted_id_ranges (const HandleGraph *graph)
 Get a sorted list of inclusive ranges of IDs used in the given HandleGraph. More...
 
unordered_map< id_t, pair< id_t, bool > > split_strands (const HandleGraph *source, MutableHandleGraph *into)
 
vector< unordered_set< id_t > > strongly_connected_components (const HandleGraph *g)
 Identify strongly connected components. More...
 
void expand_subgraph_by_steps (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &steps, bool forward_only=false)
 expand the subgraph iteratively for this many steps More...
 
void expand_subgraph_to_node_count (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &node_count, bool forward_only=false)
 expand the subgraph iteratively until its node count is at least node_count More...
 
void expand_subgraph_by_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false)
 expand the subgraph iteratively to include at least length new sequence More...
 
void expand_subgraph_to_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false)
 expand the subgraph iterativel until its total sequence length is greater than length More...
 
void extract_context (const HandleGraph &source, MutableHandleGraph &subgraph, const handle_t &handle, const uint64_t &offset, const uint64_t &length, bool fwd, bool rev)
 expand the context around a single handle position More...
 
void extract_id_range (const HandleGraph &source, const nid_t &id1, const nid_t &id2, MutableHandleGraph &subgraph)
 extract the node id range More...
 
void extract_path_range (const PathPositionHandleGraph &source, path_handle_t path_handle, int64_t start, int64_t end, MutableHandleGraph &subgraph)
 
void add_subpaths_to_subgraph (const PathPositionHandleGraph &source, MutablePathHandleGraph &subgraph, bool subpath_naming)
 
void add_connecting_edges_to_subgraph (const HandleGraph &source, MutableHandleGraph &subgraph)
 
void three_edge_connected_component_merges_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(size_t, size_t)> &same_component)
 
void three_edge_connected_components_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback)
 
void three_edge_connected_components_dense_cactus (size_t node_count, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback)
 
template<typename TECCNode >
void three_edge_connected_components (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(TECCNode)> &)> &)> &component_callback)
 
template<typename TECCNode >
void three_edge_connected_component_merges (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(TECCNode, TECCNode)> &same_component)
 
void to_gfa (const PathHandleGraph &graph, ostream &out)
 
vector< handle_ttopological_order (const HandleGraph *g)
 
vector< handle_tlazy_topological_order_internal (const HandleGraph *g, bool lazier)
 
vector< handle_tlazy_topological_order (const HandleGraph *g)
 
vector< handle_tlazier_topological_order (const HandleGraph *g)
 
unordered_set< id_torient_nodes_forward (MutableHandleGraph *g)
 
void unchop (handlegraph::MutablePathDeletableHandleGraph *graph)
 
void chop (handlegraph::MutableHandleGraph *graph, size_t max_node_length)
 
void for_each_walk (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const walk_t &)> &lambda)
 Iterate over all the walks in the graph, running lambda on each. More...
 
std::ostream & operator<< (std::ostream &out, const walk_t &walk)
 Print a walk_t to a stream. More...
 
uint64_t walk_haplotype_frequency (const HandleGraph &graph, const gbwt::GBWT &haplotypes, const walk_t &walk)
 
vector< unordered_set< id_t > > weakly_connected_components (const HandleGraph *graph)
 
vector< pair< unordered_set< id_t >, vector< handle_t > > > weakly_connected_components_with_tips (const HandleGraph *graph)
 
bool is_weakly_connected (const HandleGraph *graph)
 

Typedef Documentation

◆ BinnedDepthIndex

using vg::algorithms::BinnedDepthIndex = typedef unordered_map<string, map<size_t, map<size_t, pair<float, float> >> >

Use the above function to retrieve the binned depths of a list of paths, and store them indexed by start coordinate. If std_err is true, store <mean, stderr> instead of <mean, variance> For each path, a series of indexes is computed, for bin sizes from min_bin_size, min_bin_size^(exp_growth_factor), etc.

Function Documentation

◆ a_star()

template<class DistHeuristic >
vector< handle_t > vg::algorithms::a_star ( const HandleGraph graph,
const pos_t pos_1,
const pos_t pos_2,
const DistHeuristic &  dist_heuristic,
bool  find_min = true,
int64_t  extremal_distance = numeric_limits<int64_t>::max(),
bool  monotonic_heuristic = true 
)

Implements the A* heuristic-guided search algorithm. Returns the path from pos_1 to pos_2 that is either minimal or maximal length, according to the parameters. Allows an extremal distance beyond which the algorithm will cease looking for paths (this should be a large value when looking for minimal paths and a small value when looking for maximum paths). If there is no path between the positions, or none within the extremal length, an empty vector will be returned.

◆ add_connecting_edges_to_subgraph()

void vg::algorithms::add_connecting_edges_to_subgraph ( const HandleGraph source,
MutableHandleGraph subgraph 
)

We can accumulate a subgraph without accumulating all the edges between its nodes this helper ensures that we get the full set

◆ add_subpaths_to_subgraph()

void vg::algorithms::add_subpaths_to_subgraph ( const PathPositionHandleGraph source,
MutablePathHandleGraph subgraph,
bool  subpath_naming 
)

add subpaths to the subgraph, providing a concatenation of subpaths that are discontiguous over the subgraph based on their order in the path position index provided by the source graph will clear any path found in both graphs before writing the new steps into it if subpath_naming is true, a suffix will be added to each path in the subgraph denoting its offset in the source graph (unless the subpath was not cut up at all)

◆ alignment_path_offsets()

unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::alignment_path_offsets ( const PathPositionHandleGraph graph,
const Alignment aln,
bool  just_min,
bool  nearby,
size_t  search_limit = 0 
)

Gives the path positions of the alignment. If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If nearby is set, will search for a nearby path. Will recurse with nearby set if it is not set on initial call and no positions are found. Respects search_limit in bp in that case. If search_limit is 0, read length is used.

◆ annotate_with_initial_path_positions() [1/2]

void vg::algorithms::annotate_with_initial_path_positions ( const PathPositionHandleGraph graph,
Alignment aln,
size_t  search_limit = 0 
)

Use the graph to annotate an Alignment with the first position it touches on each reference path. Thread safe.

search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.

◆ annotate_with_initial_path_positions() [2/2]

void vg::algorithms::annotate_with_initial_path_positions ( const PathPositionHandleGraph graph,
vector< Alignment > &  aln,
size_t  search_limit = 0 
)

Use the graph annotate Alignments with the first position they touch on each reference path. Thread safe.

search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.

◆ annotate_with_node_path_positions()

void vg::algorithms::annotate_with_node_path_positions ( const PathPositionHandleGraph graph,
Alignment aln,
size_t  search_limit = 0 
)

Use the graph to annotate an Alignment with the first position it touches on each node it visits in each reference path. Thread safe. If no nodes on any path are visited, searches for a nearby path position to use.

search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.

◆ annotate_with_path_positions()

void vg::algorithms::annotate_with_path_positions ( const PathPositionHandleGraph graph,
Alignment aln,
bool  just_min,
size_t  search_limit = 0 
)

Use the graph to annotate an Alignment with positions on each reference path. Thread safe.

If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If no positions on the path are found, looks for nearby path positions in graph space. Respects search_limit in bp in that case. If search_limit is 0, read length is used.

◆ append_handle_graph()

void vg::algorithms::append_handle_graph ( const HandleGraph from,
MutableHandleGraph into 
)

Append the heads of from to the tails of into.

◆ append_mapping_sequence()

void vg::algorithms::append_mapping_sequence ( const Mapping m,
const string &  node_seq,
string &  seq 
)

use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping

◆ append_path_handle_graph()

void vg::algorithms::append_path_handle_graph ( const PathHandleGraph from,
MutablePathMutableHandleGraph into,
bool  only_connect_path_tips = false 
)

Append the heads of from to the tails of into Append all (shared) paths of from to into, copy the rest. If only_connect_path_tips is true, then only edges linking appended paths will be added (as opposed to every head and tail)

◆ apply_orientations()

unordered_set< id_t > vg::algorithms::apply_orientations ( MutableHandleGraph graph,
const vector< handle_t > &  orientations 
)

Modifies underlying graph so that any node whose handle is given in the reverse orientation is flipped so that all locally forward orientations match the orientation of the provided handles. Returns a set of IDs for nodes that were flipped. Invalid if vector contains multiple handles to the same node. May change the ordering of the underlying graph.

◆ are_equivalent()

bool vg::algorithms::are_equivalent ( const HandleGraph graph_1,
const HandleGraph graph_2,
bool  verbose = false 
)

Checks whether two graphs are identical in their IDs, sequences, and edges. Optionally reports why graphs are found non-equivalent to stderr.

◆ are_equivalent_with_paths()

bool vg::algorithms::are_equivalent_with_paths ( const PathHandleGraph graph_1,
const PathHandleGraph graph_2,
bool  verbose = false 
)

Checks whether two graphs are identical in their IDs, sequences, edges, and paths. Optionally reports why graphs are found non-equivalent to stderr.

◆ bellman_ford_shortest_cycle_length()

size_t vg::algorithms::bellman_ford_shortest_cycle_length ( const HandleGraph graph,
const handle_t source,
const vector< handle_t > &  layout,
const unordered_map< handle_t, size_t > &  handle_index,
const vector< pair< size_t, size_t >> &  feedback_edges 
)

An implementation of Bellman-Ford with Yen's ordering improvement applied to a layout ideally has a small feedback arc set

◆ binned_packed_depth()

vector< tuple< size_t, size_t, double, double > > vg::algorithms::binned_packed_depth ( const Packer packer,
const string &  path_name,
size_t  bin_size,
size_t  min_coverage,
bool  include_deletions 
)

Use all available threads to estimate the binned packed coverage of a path using above fucntion Each element is a bin's 0-based open-ended interval in the path, and its coverage mean,variance.

◆ binned_packed_depth_index()

BinnedDepthIndex vg::algorithms::binned_packed_depth_index ( const Packer packer,
const vector< string > &  path_names,
size_t  min_bin_size,
size_t  max_bin_size,
double  exp_growth_factor,
size_t  min_coverage,
bool  include_deletions,
bool  std_err 
)

◆ chop()

void vg::algorithms::chop ( handlegraph::MutableHandleGraph graph,
size_t  max_node_length 
)

Chop the graph so nodes are at most max_node_length

◆ copy_handle_graph()

void vg::algorithms::copy_handle_graph ( const HandleGraph from,
MutableHandleGraph into 
)

Copies the nodes and edges from one graph into another.

◆ copy_path()

void vg::algorithms::copy_path ( const PathHandleGraph from,
const path_handle_t path,
MutablePathHandleGraph into 
)

Copies a path from one graph to another. Nodes and edges to support the path must already exist.

◆ copy_path_handle_graph()

void vg::algorithms::copy_path_handle_graph ( const PathHandleGraph from,
MutablePathMutableHandleGraph into 
)

Copies the nodes, edges, and paths from one graph into another.

◆ count_walks()

size_t vg::algorithms::count_walks ( const HandleGraph graph)

Returns the number of source-to-sink walks through the graph Returns numeric_limits<size_t>::max() if the actual number of walks is larger than this.

◆ count_walks_through_nodes()

tuple< vector< handle_t >, unordered_map< handle_t, size_t >, bool > vg::algorithms::count_walks_through_nodes ( const HandleGraph graph)

Returns the count map through each snarl in a graph. Assumes that the graph is a single-stranded DAG. Consider checking these properties with algorithms::is_single_stranded and algorithms::is_directed_acyclic for safety.

◆ dagify()

unordered_map< id_t, id_t > vg::algorithms::dagify ( const HandleGraph graph,
MutableHandleGraph into,
size_t  min_preserved_path_length 
)

◆ dfs() [1/3]

void vg::algorithms::dfs ( const HandleGraph graph,
const function< void(const handle_t &)> &  handle_begin_fn,
const function< void(const handle_t &)> &  handle_end_fn,
const function< bool(void)> &  break_fn 
)

◆ dfs() [2/3]

void vg::algorithms::dfs ( const HandleGraph graph,
const function< void(const handle_t &)> &  handle_begin_fn,
const function< void(const handle_t &)> &  handle_end_fn,
const function< bool(void)> &  break_fn,
const function< void(const edge_t &)> &  edge_fn,
const function< void(const edge_t &)> &  tree_fn,
const function< void(const edge_t &)> &  edge_curr_fn,
const function< void(const edge_t &)> &  edge_cross_fn,
const vector< handle_t > &  sources,
const unordered_set< handle_t > &  sinks 
)

◆ dfs() [3/3]

void vg::algorithms::dfs ( const HandleGraph graph,
const function< void(const handle_t &)> &  handle_begin_fn,
const function< void(const handle_t &)> &  handle_end_fn,
const vector< handle_t > &  sources,
const unordered_set< handle_t > &  sinks 
)

◆ dijkstra() [1/2]

bool vg::algorithms::dijkstra ( const HandleGraph g,
const unordered_set< handle_t > &  starts,
function< bool(const handle_t &, size_t)>  reached_callback,
bool  traverse_leftward = false 
)

Same as the single-start version, except allows starting from multiple handles, all at distance 0.

◆ dijkstra() [2/2]

bool vg::algorithms::dijkstra ( const HandleGraph g,
handle_t  start,
function< bool(const handle_t &, size_t)>  reached_callback,
bool  traverse_leftward = false 
)

Walk out from the given handle and visit all reachable handles (including the start) in the graph once, in closest-first order, accounting for sequence lengths. Walks right unless traverse_leftward is specified, in which case it walks left. Distances are measured between the outgoing side of the start node and the incoming side of the target.

When the shortest distance to a handle has been determined, calls reached_callback with that handle and the distance to it. Calls to reached_callback will occur in ascending order of distance. The reached_callback function must return true to continue the search, and false to abort it early.

Returns true if the search terminated normally, and false if it was aborted.

◆ dijkstra_shortest_cycle_length()

size_t vg::algorithms::dijkstra_shortest_cycle_length ( const HandleGraph graph,
const handle_t source 
)

Simple Dijkstra implementation that computes shortest cycle.

◆ distance_to_head() [1/2]

int32_t vg::algorithms::distance_to_head ( handle_t  h,
int32_t  limit,
const HandleGraph graph 
)

◆ distance_to_head() [2/2]

int32_t vg::algorithms::distance_to_head ( handle_t  h,
int32_t  limit,
int32_t  dist,
unordered_set< handle_t > &  seen,
const HandleGraph graph 
)

Get the distance in bases from start of node to start of closest head node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty

◆ distance_to_tail() [1/2]

int32_t vg::algorithms::distance_to_tail ( handle_t  h,
int32_t  limit,
const HandleGraph graph 
)

◆ distance_to_tail() [2/2]

int32_t vg::algorithms::distance_to_tail ( handle_t  h,
int32_t  limit,
int32_t  dist,
unordered_set< handle_t > &  seen,
const HandleGraph graph 
)

Get the distance in bases from end of node to end of closest tail node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty

◆ eades_algorithm()

vector< handle_t > vg::algorithms::eades_algorithm ( const HandleGraph graph)

Returns a layout of handles that has a small number of edges that point backward along the layout (i.e. feedback arcs). Only valid for graphs that have a single stranded orientation. Consider checking this property with algorithms::single_stranded_orientation.

◆ expand_context()

void vg::algorithms::expand_context ( const HandleGraph source,
MutableHandleGraph subgraph,
int64_t  dist,
bool  use_steps,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_context_by_length()

void vg::algorithms::expand_context_by_length ( const HandleGraph source,
MutableHandleGraph subgraph,
int64_t  dist,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_context_by_steps()

void vg::algorithms::expand_context_by_steps ( const HandleGraph source,
MutableHandleGraph subgraph,
int64_t  dist,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_context_with_paths()

void vg::algorithms::expand_context_with_paths ( const PathHandleGraph source,
MutablePathMutableHandleGraph subgraph,
int64_t  dist,
bool  use_steps,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_subgraph_by_length()

void vg::algorithms::expand_subgraph_by_length ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  length,
bool  forward_only 
)

expand the subgraph iteratively to include at least length new sequence

◆ expand_subgraph_by_steps()

void vg::algorithms::expand_subgraph_by_steps ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  steps,
bool  forward_only 
)

expand the subgraph iteratively for this many steps

◆ expand_subgraph_to_length()

void vg::algorithms::expand_subgraph_to_length ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  length,
bool  forward_only 
)

expand the subgraph iterativel until its total sequence length is greater than length

◆ expand_subgraph_to_node_count()

void vg::algorithms::expand_subgraph_to_node_count ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  node_count,
bool  forward_only 
)

expand the subgraph iteratively until its node count is at least node_count

◆ extend()

void vg::algorithms::extend ( const HandleGraph source,
MutableHandleGraph into 
)

Adds the non-duplicative nodes and edges from 'source' to 'into'. Assumes that both graphs use the same ID space.

◆ extract_connecting_graph()

unordered_map< id_t, id_t > vg::algorithms::extract_connecting_graph ( const HandleGraph source,
DeletableHandleGraph into,
int64_t  max_len,
pos_t  pos_1,
pos_t  pos_2,
bool  detect_terminal_cycles = false,
bool  only_walks = false,
bool  strict_max_len = false 
)

Fills a DeletableHandleGraph with the subgraph of a HandleGraph that connects two positions. The nodes that contain the two positions will be 'cut' at the position and will be tips in the returned graph. By default, the algorithm provides only one guarantee:

  • 'into' contains all walks between pos_1 and pos_2 under the maximum length except walks that include a cycle involving either position Cutting the nodes containing the two positions breaks cycles containing those nodes, so these nodes may optinally be duplicated so that cycles involving the two positions are maintained. No other nodes will be duplicated. The algorithm optionally provides additional guarantees at the expense of increased computational cost, but no increase in asymptotic complexity (the guarantees are described below). If no walk between the two positions under the maximum length exists, 'into' will be left empty. An error is thrown if 'into' is not empty when passed to function.

Args: source graph to extract subgraph from into graph to extract into max_len guarantee finding walks along which pos_1 and pos_2 are this distance apart pos_1 start position, subgraph walks begin from here in same orientation pos_2 end position, subgraph walks end here in the same orientation detect_terminal_cycles also find walks that include cycles involving pos_1 and/or pos_2 only_walks only extract nodes and edges if they fall on some walk between pos_1 and pos_2 strict_max_len only extract nodes and edges if they fall on some walk between pos_1 and pos_2 that is under the maximum length (implies only_walks = true)

◆ extract_containing_graph() [1/3]

void vg::algorithms::extract_containing_graph ( const HandleGraph source,
MutableHandleGraph into,
const vector< pos_t > &  positions,
const vector< size_t > &  position_forward_max_dist,
const vector< size_t > &  position_backward_max_dist,
size_t  reversing_walk_length = 0 
)

Same semantics as previous except that there is a separate maximum distance for different positions in the graph and for each search direction. Each distance is associated with the position with the same index. The forward distance is in the same orientation as the position, and the backward distance is in the reverse orientation of the position. Throws an error if the position and distance vectors are not the same length.

◆ extract_containing_graph() [2/3]

void vg::algorithms::extract_containing_graph ( const HandleGraph source,
MutableHandleGraph into,
const vector< pos_t > &  positions,
const vector< size_t > &  position_max_dist,
size_t  reversing_walk_length = 0 
)

Same semantics as previous except that there is a separate maximum distance for different positions in the graph. Each distance is associated with the position with the same index. Throws an error if the position and distance vectors are not the same length.

◆ extract_containing_graph() [3/3]

void vg::algorithms::extract_containing_graph ( const HandleGraph source,
MutableHandleGraph into,
const vector< pos_t > &  positions,
size_t  max_dist,
size_t  reversing_walk_length = 0 
)

Fills graph 'into' with the subgraph of the handle graph 'source' that contains all of the positions in the positions vector and all other nodes and edges that can be reached within a maximum distance from any of these positions. Optionally also finds nodes and edges that can be reached within some distance from the previously mentioned nodes, except along non- proper bidirected walks. Node IDs in the subgraph are retained from the source graph.

Args: source graph to extract subgraph from into graph to extract into positions search outward from these positions max_dist include all nodes and edges that can be reached in at most this distance reversing_walk_length also find graph material that can be reached

◆ extract_context()

void vg::algorithms::extract_context ( const HandleGraph source,
MutableHandleGraph subgraph,
const handle_t handle,
const uint64_t &  offset,
const uint64_t &  length,
bool  fwd,
bool  rev 
)

expand the context around a single handle position

◆ extract_extending_graph()

unordered_map< id_t, id_t > vg::algorithms::extract_extending_graph ( const HandleGraph source,
DeletableHandleGraph into,
int64_t  max_dist,
pos_t  pos,
bool  backward,
bool  preserve_cycles_on_src_node 
)

Fills graph 'into' with the subgraph of the handle graph 'source' that extends in one direction from a given position, up to a maximum distance. The node containing the position will be "cut" so that only the portion that is forward in the search direction remains. Node IDs may be changed in the extracted graph, but they can be translated back to node IDs in the original graph with the returned map, although that translation procedure MUST handle the node that pos is on specially, as it may be cut. translate_node_ids from path.hpp can do this as long as you pass along what part of the node was removed. The node containing the source position may optionally be duplicated to preserve cycles on it after its cut, but no other nodes will will duplicated.

Args: source graph to extract subgraph from into graph to extract into max_dist include all nodes and edges that can be reached in at most this distance pos extend from this position backward extend in this direction preserve_cycles_on_src if necessary, duplicate starting node to preserve cycles after cutting it

◆ extract_id_range()

void vg::algorithms::extract_id_range ( const HandleGraph source,
const nid_t id1,
const nid_t id2,
MutableHandleGraph subgraph 
)

extract the node id range

◆ extract_path_range()

void vg::algorithms::extract_path_range ( const PathPositionHandleGraph source,
path_handle_t  path_handle,
int64_t  start,
int64_t  end,
MutableHandleGraph subgraph 
)

extract the path range nodes aren't cut, so the returned graph may start before start and/or end after end if end < 0, then it will walk to the end of the path

◆ find_shortest_paths()

unordered_map< handle_t, size_t > vg::algorithms::find_shortest_paths ( const HandleGraph g,
handle_t  start,
bool  traverse_leftward = false 
)

Finds the length of the shortest oriented path from the given handle in a given direction to all reachable oriented nodes on a directed walk. Uses Dijkstra's Algorithm. Distances are measured between the outgoing side of the start node and the incoming side of the target.

◆ find_tips()

vector< handle_t > vg::algorithms::find_tips ( const HandleGraph g)

Find all of the tips in the graph, facing inward. Nodes with no edges will appear once in each orientation.

◆ for_each_kmer()

void vg::algorithms::for_each_kmer ( const HandleGraph graph,
size_t  k,
size_t  edge_max,
const std::function< void(const kmer_t &)> &  lambda 
)

Iterate over all the kmers in the graph, running lambda on each.

◆ for_each_walk()

void vg::algorithms::for_each_walk ( const HandleGraph graph,
size_t  k,
size_t  edge_max,
const std::function< void(const walk_t &)> &  lambda 
)

Iterate over all the walks in the graph, running lambda on each.

◆ get_depth_from_index()

pair< float, float > vg::algorithms::get_depth_from_index ( const BinnedDepthIndex depth_index,
const string &  path_name,
size_t  start_offset,
size_t  end_offset 
)

Query index created above.

◆ gfa_to_handle_graph()

void vg::algorithms::gfa_to_handle_graph ( const string &  filename,
MutableHandleGraph graph,
bool  try_from_disk = true,
bool  try_id_increment_hint = false 
)

Read a GFA file for a blunt-ended graph into a HandleGraph. Give "-" as a filename for stdin.

Optionally tries read the GFA from disk without creating an in-memory representation (defaults to in-memory algorithm if reading from stdin).

Also optionally provides a hint about the node ID range to the handle graph implementation before constructing it (defaults to no hint if reading from stdin).

Throws GFAFormatError if the GFA file is not acceptable, and std::ios_base::failure if an IO operation fails. Throws invalid_argument if otherwise misused.

◆ gfa_to_handle_graph_add_paths()

void vg::algorithms::gfa_to_handle_graph_add_paths ( const string &  filename,
istream *  unseekable,
MutablePathHandleGraph graph,
gfak::GFAKluge &  gg 
)

After the given GFAKluge has been populated with nodes and edges, load path information. If the input is a seekable file, filename will be filled in and unseekable will be nullptr. If the input is not a seekable file, filename may be filled in, and unseekable will be set to a stream to read from.

◆ gfa_to_handle_graph_in_memory()

void vg::algorithms::gfa_to_handle_graph_in_memory ( istream &  in,
MutableHandleGraph graph,
gfak::GFAKluge &  gg 
)

◆ gfa_to_handle_graph_load_graph()

void vg::algorithms::gfa_to_handle_graph_load_graph ( const string &  filename,
istream *  unseekable,
MutableHandleGraph graph,
bool  try_id_increment_hint,
gfak::GFAKluge &  gg 
)

Parse nodes and edges and load them into the given GFAKluge. If the input is a seekable file, filename will be filled in and unseekable will be nullptr. If the input is not a seekable file, filename may be filled in, and unseekable will be set to a stream to read from.

◆ gfa_to_handle_graph_on_disk()

void vg::algorithms::gfa_to_handle_graph_on_disk ( const string &  filename,
MutableHandleGraph graph,
bool  try_id_increment_hint,
gfak::GFAKluge &  gg 
)

◆ gfa_to_path_handle_graph()

void vg::algorithms::gfa_to_path_handle_graph ( const string &  filename,
MutablePathMutableHandleGraph graph,
bool  try_from_disk,
bool  try_id_increment_hint 
)

Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph.

◆ gfa_to_path_handle_graph_in_memory()

void vg::algorithms::gfa_to_path_handle_graph_in_memory ( istream &  in,
MutablePathMutableHandleGraph graph 
)

Same as above but operating on a stream. Assumed to be non-seekable; all conversion happens in memory. Always streaming. Doesn't support ID increment hints.

◆ head_nodes()

vector< handle_t > vg::algorithms::head_nodes ( const HandleGraph g)

Find all of the nodes with no edges on their left sides.

◆ id_order()

vector< handle_t > vg::algorithms::id_order ( const HandleGraph g)

Order all the handles in the graph in ID order. All orientations are forward.

◆ is_acyclic()

bool vg::algorithms::is_acyclic ( const HandleGraph graph)

◆ is_directed_acyclic()

bool vg::algorithms::is_directed_acyclic ( const HandleGraph graph)

Returns true if the graph contains no directed cycles. It may contain reversing cycles (i.e. true if no node can reach itself in the same orientation along a bidirected walk, but it might be able to reach itself in the opposite orientation).

◆ is_head_node()

bool vg::algorithms::is_head_node ( handle_t  h,
const HandleGraph g 
)

◆ is_single_stranded()

bool vg::algorithms::is_single_stranded ( const HandleGraph graph)

Returns true if the graph contains no reversing edges (i.e. edges that connected the locally forward orientation of a node to the locally reverse orientation of of another node).

◆ is_tail_node()

bool vg::algorithms::is_tail_node ( handle_t  h,
const HandleGraph g 
)

◆ is_weakly_connected()

bool vg::algorithms::is_weakly_connected ( const HandleGraph graph)

Returns true if graph is a single weakly connected component. Graphs with no nodes are considered connected.

◆ jump_along_closest_path()

vector< pos_t > vg::algorithms::jump_along_closest_path ( const PathPositionHandleGraph graph,
const pos_t pos,
int64_t  jump_dist,
size_t  max_search_dist 
)

returns a vector of positions that are found by jumping a fixed oriented distance along path(s) from the given position. if the position is not on a path, searches from the position to a path and adds/subtracts the search distance to the jump depending on the search direction. returns an empty vector if there is no path within the max search distance or if the jump distance goes past the end of the path

◆ lazier_topological_order()

vector< handle_t > vg::algorithms::lazier_topological_order ( const HandleGraph g)

Order the nodes in a graph using a topological sort. Similar to lazy_topological_order but somewhat faster. The algorithm is invalid in a graph that has any cycles or any reversing edges. For safety, consider these properties with algorithms::is_acyclic() and algorithms::is_single_stranded().

◆ lazy_topological_order()

vector< handle_t > vg::algorithms::lazy_topological_order ( const HandleGraph g)

Order the nodes in a graph using a topological sort. The sort is NOT guaranteed to be machine-independent, but it is faster than topological_order(). This algorithm is invalid in a graph that has any cycles. For safety, consider this property with algorithms::is_directed_acyclic().

◆ lazy_topological_order_internal()

vector<handle_t> vg::algorithms::lazy_topological_order_internal ( const HandleGraph g,
bool  lazier 
)

◆ make_single_stranded()

unordered_set< id_t > vg::algorithms::make_single_stranded ( MutableHandleGraph graph)

Finds a set of node orientations that can be applied so that there are no reversing edges (i.e. every edge connects a locally forward node traversal to another locally forward orientation). If no such combination of orientations exists, produces an error and exits. Returns a set of the node IDs for nodes that were swapped in orientation. Potentially invalidates any existing handles.

◆ merge()

void vg::algorithms::merge ( handlegraph::MutablePathDeletableHandleGraph graph,
const vector< pair< handle_t, size_t >> &  start,
size_t  length 
)

Merge the given ranges of bases on the given handles together, rewriting paths. Sequences must match. Handles to a single node may occur no more than once.

◆ min_approx_path_distance()

size_t vg::algorithms::min_approx_path_distance ( const PathPositionHandleGraph graph,
const pos_t pos1,
const pos_t pos2,
uint64_t  max_search 
)

use the embedded paths to get an estimated minimum distance between the positions

◆ multipath_alignment_path_offsets()

unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::multipath_alignment_path_offsets ( const PathPositionHandleGraph graph,
const multipath_alignment_t mp_aln 
)

Find the position of a multipath alignment on paths. Returns the lowest offset position on a path for each contiguous stretch of the multipath alignment, but multiple positions on the same path may be returned if the multipath alignment is disconnected or fans out toward the sources or sinks.

◆ nearest_offsets_in_paths()

unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::nearest_offsets_in_paths ( const PathPositionHandleGraph graph,
const pos_t pos,
int64_t  max_search 
)

Return, for the nearest position in a path to the given position, subject to the given max search distance, a mapping from path name to all positions on each path where that pos_t occurs.

◆ next_pos_chars()

map< pos_t, char > vg::algorithms::next_pos_chars ( const PathPositionHandleGraph graph,
pos_t  pos 
)

◆ normalize()

void vg::algorithms::normalize ( handlegraph::MutablePathDeletableHandleGraph graph,
int  max_iter = 1,
bool  debug = false 
)

Normalize a graph, performing up to the given number of iterations. Simplifies siblings and unchops runs of nodes, in a loop.

◆ offsets_in_paths()

map< string, vector< pair< size_t, bool > > > vg::algorithms::offsets_in_paths ( const PathPositionHandleGraph graph,
const pos_t pos 
)

Wrapper for the above to support some earlier code. Only looks for paths that directly touch the position, and returns the paths by name.

◆ operator<<() [1/2]

std::ostream & vg::algorithms::operator<< ( std::ostream &  out,
const kmer_t kmer 
)

Print a kmer_t to a stream.

◆ operator<<() [2/2]

std::ostream & vg::algorithms::operator<< ( std::ostream &  out,
const walk_t walk 
)

Print a walk_t to a stream.

◆ orient_nodes_forward()

unordered_set< id_t > vg::algorithms::orient_nodes_forward ( MutableHandleGraph g)

Topologically sort the given handle graph, and then apply that sort to orient all the nodes in the global forward direction. May invalidate any paths stored by the graph. The re-orientation is guaranteed to be stable. Invalidates all handles into the graph (since any node might be flipped).

◆ packed_depth_of_bin()

pair< double, double > vg::algorithms::packed_depth_of_bin ( const Packer packer,
step_handle_t  start_step,
step_handle_t  end_plus_one_step,
size_t  min_coverage,
bool  include_deletions 
)

Estimate the coverage along a given reference path interval [start_step, end_plus_one_step) Coverage is obtained only from positions along the path, and variation is not counted Except if "include_deletions" is true, then reference path positions covered by a deletion edge (which is contained in the bin) will get the deletion edge's coverage counted. Other types of events (such as SNPs) can throw off coverage in similar ways but deletions tend to be bigger (and easier to find), so we hope that counting them is enough. If one wants to infer deletions from the coverage, obviously this should be false, but if looking for a background coverage for genotyping, then setting it to true may be helpful

◆ packed_depths()

void vg::algorithms::packed_depths ( const Packer packer,
const string &  path_name,
size_t  min_coverage,
ostream &  out_stream 
)

print path-name offset base-coverage for every base on a path (just like samtools depth) ignoring things below min_coverage. offsets are 1-based in output stream

◆ parse_gfa_sequence_id()

nid_t vg::algorithms::parse_gfa_sequence_id ( const string &  str)

◆ path_string()

std::string vg::algorithms::path_string ( const HandleGraph graph,
const Path path 
)

use the given graph and the path to determine our path string

◆ process_raw_gfa_path_name()

string vg::algorithms::process_raw_gfa_path_name ( const string &  path_name_raw)

◆ remove_high_degree_nodes()

void vg::algorithms::remove_high_degree_nodes ( DeletableHandleGraph g,
int  max_degree 
)

Remove nodes with >= max_degree total edges on each side. Note that end-to-start self loops count twice.

◆ reverse_complement_graph()

unordered_map< id_t, pair< id_t, bool > > vg::algorithms::reverse_complement_graph ( const HandleGraph source,
MutableHandleGraph into 
)

Fills a MutableHandleGraph 'into' with a graph that has the same sequence and path space as 'source', but the forward strand of every node is flipped to the reverse strand. Reports an error and exits if 'into' is not empty.

◆ sample_gam_depth()

pair<double, double> vg::algorithms::sample_gam_depth ( const HandleGraph graph,
const vector< Alignment > &  alignments,
size_t  max_nodes,
size_t  random_seed,
size_t  min_coverage,
size_t  min_mapq 
)

◆ sample_mapping_depth() [1/2]

pair< double, double > vg::algorithms::sample_mapping_depth ( const HandleGraph graph,
const string &  input_filename,
size_t  max_nodes,
size_t  random_seed,
size_t  min_coverage,
size_t  min_mapq,
const string &  format = "GAM" 
)

Return the mean and variance of coverage of randomly sampled nodes from a mappings file Nodes with less than min_coverage are ignored The input_filename can be - for stdin The stream is scanned in parallel with all threads max_nodes is used to keep memory down valid formats are "GAM" and "GAF"

◆ sample_mapping_depth() [2/2]

pair<double, double> vg::algorithms::sample_mapping_depth ( const HandleGraph graph,
const vector< Alignment > &  alignments,
size_t  max_nodes,
size_t  random_seed,
size_t  min_coverage,
size_t  min_mapq 
)

As above, but read a vector instead of a stream.

◆ shortest_cycle_length() [1/2]

size_t vg::algorithms::shortest_cycle_length ( const HandleGraph graph)

Returns the length of the shortest cycle in the entire graph, or numeric_limits<size_t>::max() if no cycle exists.

◆ shortest_cycle_length() [2/2]

size_t vg::algorithms::shortest_cycle_length ( const HandleGraph graph,
const handle_t source 
)

Returns the length of the shortest cycle containing the source node, or numeric_limits<size_t>::max() if no cycle exists.

◆ shortest_cycle_length_internal()

size_t vg::algorithms::shortest_cycle_length_internal ( const HandleGraph graph,
const handle_t source,
const vector< handle_t > &  layout,
const unordered_map< handle_t, size_t > &  handle_index,
const vector< pair< size_t, size_t >> &  feedback_edges 
)

◆ simple_offsets_in_paths()

unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::simple_offsets_in_paths ( const PathPositionHandleGraph graph,
pos_t  pos 
)

A "simple" model for path position getting for debugging.

◆ simplify_siblings()

bool vg::algorithms::simplify_siblings ( handlegraph::MutablePathDeletableHandleGraph graph)

Simplify siblings in the given graph.

When one base has two successors with the same base value, and those successors have the same set of predecessors, the successors will be merged.

Performs only a subset of the possible merges. Can only merge in from one side of a given node in a single invocation. Returns true if it made progress and there may be more merging to do.

Preserves paths.

◆ single_stranded_orientation()

vector< handle_t > vg::algorithms::single_stranded_orientation ( const HandleGraph graph)

Returns a vector of handles where the orientation of each handle indicates an orientation that could be used to convert the graph into a single-stranded graph. That is, if all of the reverse handles (or all of the forward handles) were swapped in orientation, the graph would contain no reversing edges. Returns an empty vector if there is no such combination of node orientations (also if graph has no nodes).

◆ sorted_id_ranges()

vector< pair< id_t, id_t > > vg::algorithms::sorted_id_ranges ( const HandleGraph graph)

Get a sorted list of inclusive ranges of IDs used in the given HandleGraph.

◆ split_strands()

unordered_map< id_t, pair< id_t, bool > > vg::algorithms::split_strands ( const HandleGraph source,
MutableHandleGraph into 
)

Fills a MutableHandleGraph 'into' with a graph that has the same sequence and path space as 'source', but all of the sequences are on the forward strand. This is accomplished by creating a new node for each node in the source graph with the reverse complement sequence. Returns a map that translates node IDs from 'into' to their node ID and orientation in 'source'. Reports an error and exits if 'into' is not empty.

◆ strongly_connected_components()

vector< unordered_set< id_t > > vg::algorithms::strongly_connected_components ( const HandleGraph handle_graph)

Identify strongly connected components.

◆ tail_nodes()

vector< handle_t > vg::algorithms::tail_nodes ( const HandleGraph g)

Find all of the nodes with no edges on their right sides.

◆ three_edge_connected_component_merges()

template<typename TECCNode >
void vg::algorithms::three_edge_connected_component_merges ( const function< void(const function< void(TECCNode)> &)> &  for_each_node,
const function< void(TECCNode, const function< void(TECCNode)> &)> &  for_each_connected_node,
const function< void(TECCNode, TECCNode)> &  same_component 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).

Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.

Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!

If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.

◆ three_edge_connected_component_merges_dense()

void vg::algorithms::three_edge_connected_component_merges_dense ( size_t  node_count,
size_t  first_root,
const function< void(size_t, const function< void(size_t)> &)> &  for_each_connected_node,
const function< void(size_t, size_t)> &  same_component 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.

Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.

Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!

This defines the data we track for each node in the graph

When in the DFS were we first visited?

When in the DFS were we last visited? Needed for finding replacement neighbors to implement path range absorption in part 1.3, when we're asked for a range to a neighbor that got eaten.

What is our "low point" in the search. This is the earliest dfs_counter for a node that this node or any node in its DFS subtree has a back-edge to.

What is the effective degree of this node in the graph with all the absorb-eject modifications applied?

What node has the continuation of this node's path? If equal to numeric_limits<number_t>::max(), the path ends after here. The node's path is the path from this node, into its DFS subtree, to (one of) the nodes in the subtree that has the back-edge that caused this node's low point to be so low. Basically a low point traceback.

Is this node actually on its own path? Nodes can be removed from their paths if those nodes don't matter any more (i.e. got absorbed) but their paths still need to be tails for other paths.

Has the node been visited yet? Must be 0. TODO: Move to its own vector to make zeroing them all free-ish with page table shenanigans.

Track the node that this stack frame represents

Track all the neighbors left to visit. When we visit a neighbor we pop it off the back.

When we look at the neighbors, we need to be able to tell the tree edge to the parent from further back edges to the parent. So we have a flag for whether we have seen the parent tree edge already, and the first neighbors entry that is our parent will get called the tree edge.

Track whether we made a recursive DFS call into the last neighbor or not. If we did, we need to do some work when we come out of it and return to this frame.

◆ three_edge_connected_components()

template<typename TECCNode >
void vg::algorithms::three_edge_connected_components ( const function< void(const function< void(TECCNode)> &)> &  for_each_node,
const function< void(TECCNode, const function< void(TECCNode)> &)> &  for_each_connected_node,
const function< void(const function< void(const function< void(TECCNode)> &)> &)> &  component_callback 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).

Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.

For each component identified, calls the given callback with a function that iterates over all nodes in the component.

If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.

◆ three_edge_connected_components_dense()

void vg::algorithms::three_edge_connected_components_dense ( size_t  node_count,
size_t  first_root,
const function< void(size_t, const function< void(size_t)> &)> &  for_each_connected_node,
const function< void(const function< void(const function< void(size_t)> &)> &)> &  component_callback 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.

Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.

For each component identified, calls the given callback with a function that iterates over all nodes in the component.

◆ three_edge_connected_components_dense_cactus()

void vg::algorithms::three_edge_connected_components_dense_cactus ( size_t  node_count,
const function< void(size_t, const function< void(size_t)> &)> &  for_each_connected_node,
const function< void(const function< void(const function< void(size_t)> &)> &)> &  component_callback 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.

Wraps the known good the 3 edge connected components algorithm from the pinchesAndCacti library.

Takes a total node count, and a function that, given a node, loops an iteratee over all nodes connected to it.

For each component identified, calls the given callback with a function that iterates over all nodes in the component.

◆ to_gfa()

void vg::algorithms::to_gfa ( const PathHandleGraph graph,
ostream &  out 
)

◆ topological_order()

vector< handle_t > vg::algorithms::topological_order ( const HandleGraph g)

Order and orient the nodes in the graph using a topological sort. The sort is guaranteed to be machine-independent given the initial graph's node and edge ordering. The algorithm is well-defined on non-DAG graphs, but the order is necessarily not a topological order.

We use a bidirected adaptation of Kahn's topological sort (1962), which can handle components with no heads or tails.

L ← Empty list that will contain the sorted and oriented elements S ← Set of nodes which have been oriented, but which have not had their downstream edges examined N ← Set of all nodes that have not yet been put into S

while N is nonempty do remove a node from N, orient it arbitrarily, and add it to S (In practice, we use "seeds": the heads all in a batch at the start, and any nodes we have seen that had too many incoming edges) while S is non-empty do remove an oriented node n from S add n to tail of L for each node m with an edge e from n to m do remove edge e from the graph if m has no other edges to that side then orient m such that the side the edge comes to is first remove m from N insert m into S otherwise put an oriented m on the list of arbitrary places to start when S is empty (This helps start at natural entry points to cycles) return L (a topologically sorted order and orientation)

◆ unchop()

void vg::algorithms::unchop ( handlegraph::MutablePathDeletableHandleGraph graph)

Unchop by gluing abutting handles with just a single edge between them and compatible path steps together.

◆ validate_gfa_edge()

void vg::algorithms::validate_gfa_edge ( const gfak::edge_elem &  e)

◆ walk_haplotype_frequency()

uint64_t vg::algorithms::walk_haplotype_frequency ( const HandleGraph graph,
const gbwt::GBWT &  haplotypes,
const walk_t walk 
)

◆ weakly_connected_components()

vector< unordered_set< id_t > > vg::algorithms::weakly_connected_components ( const HandleGraph graph)

Returns sets of IDs defining components that are connected by any series of nodes and edges, even if it is not a valid bidirected walk. TODO: It might make sense to have a handle-returning version, but the consumers of weakly connected components right now want IDs, and membership in a weakly connected component is orientation-independent.

◆ weakly_connected_components_with_tips()

vector< pair< unordered_set< id_t >, vector< handle_t > > > vg::algorithms::weakly_connected_components_with_tips ( const HandleGraph graph)

Return pairs of weakly connected component ID sets and the handles that are their tips, oriented inward. If a node is both a head and a tail, it will appear in tips in both orientations.

◆ widest_dijkstra()

pair< double, vector< handle_t > > vg::algorithms::widest_dijkstra ( const HandleGraph g,
handle_t  source,
handle_t  sink,
function< double(const handle_t &)>  node_weight_callback,
function< double(const edge_t &)>  edge_weight_callback,
function< bool(const handle_t &)>  is_node_ignored_callback,
function< bool(const edge_t &)>  is_edge_ignored_callbback,
bool  greedy_avg = false 
)

This Dijkstra is the same underlying algorithm as the one in dijkstra.hpp but the interface is different enough that I opted to make it a seprate thing rather than add loads of optional arguments. The key differences are these generalizations: – looks for the "widest" path (maximum minimum weight) instead of shortest – counts node and edge weights (via callbakcs) – returns the path as well as the score – option for ignoring certain nodes and edges in search (required by Yen's algorithm) – greedy_avg option switches the algorithm to a heuristic (no optimal guarantee) search using the running averages support instead of min-flow support as objective function.

◆ yens_k_widest_paths()

vector< pair< double, vector< handle_t > > > vg::algorithms::yens_k_widest_paths ( const HandleGraph g,
handle_t  source,
handle_t  sink,
size_t  K,
function< double(const handle_t &)>  node_weight_callback,
function< double(const edge_t &)>  edge_weight_callback,
bool  greedy_avg 
)

Find the k widest paths.