vg
tools for working with variation graphs
|
Classes | |
struct | GFAFormatError |
This exception will be thrown if the GFA data is not acceptable. More... | |
struct | kmer_t |
Stores a kmer in the context of a graph. More... | |
struct | walk_t |
Stores a walk in the context of a graph. More... | |
Typedefs | |
using | BinnedDepthIndex = unordered_map< string, map< size_t, map< size_t, pair< float, float > >> > |
Functions | |
template<class DistHeuristic > | |
vector< handle_t > | a_star (const HandleGraph *graph, const pos_t &pos_1, const pos_t &pos_2, const DistHeuristic &dist_heuristic, bool find_min=true, int64_t extremal_distance=numeric_limits< int64_t >::max(), bool monotonic_heuristic=true) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > | alignment_path_offsets (const PathPositionHandleGraph &graph, const Alignment &aln, bool just_min, bool nearby, size_t search_limit) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > | multipath_alignment_path_offsets (const PathPositionHandleGraph &graph, const multipath_alignment_t &mp_aln) |
void | annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit) |
void | annotate_with_node_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit) |
void | annotate_with_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, bool just_min, size_t search_limit) |
void | annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, vector< Alignment > &alns, size_t search_limit) |
void | append_handle_graph (const HandleGraph *from, MutableHandleGraph *into) |
Append the heads of from to the tails of into. More... | |
void | append_path_handle_graph (const PathHandleGraph *from, MutablePathMutableHandleGraph *into, bool only_connect_path_tips) |
unordered_set< id_t > | apply_orientations (MutableHandleGraph *graph, const vector< handle_t > &orientations) |
size_t | min_approx_path_distance (const PathPositionHandleGraph *graph, const pos_t &pos1, const pos_t &pos2, uint64_t max_search) |
use the embedded paths to get an estimated minimum distance between the positions More... | |
bool | are_equivalent (const HandleGraph *graph_1, const HandleGraph *graph_2, bool verbose) |
bool | are_equivalent_with_paths (const PathHandleGraph *graph_1, const PathHandleGraph *graph_2, bool verbose) |
void | copy_handle_graph (const HandleGraph *from, MutableHandleGraph *into) |
Copies the nodes and edges from one graph into another. More... | |
void | copy_path_handle_graph (const PathHandleGraph *from, MutablePathMutableHandleGraph *into) |
Copies the nodes, edges, and paths from one graph into another. More... | |
void | copy_path (const PathHandleGraph *from, const path_handle_t &path, MutablePathHandleGraph *into) |
tuple< vector< handle_t >, unordered_map< handle_t, size_t >, bool > | count_walks_through_nodes (const HandleGraph *graph) |
size_t | count_walks (const HandleGraph *graph) |
void | packed_depths (const Packer &packer, const string &path_name, size_t min_coverage, ostream &out_stream) |
pair< double, double > | packed_depth_of_bin (const Packer &packer, step_handle_t start_step, step_handle_t end_plus_one_step, size_t min_coverage, bool include_deletions) |
vector< tuple< size_t, size_t, double, double > > | binned_packed_depth (const Packer &packer, const string &path_name, size_t bin_size, size_t min_coverage, bool include_deletions) |
BinnedDepthIndex | binned_packed_depth_index (const Packer &packer, const vector< string > &path_names, size_t min_bin_size, size_t max_bin_size, double exp_growth_factor, size_t min_coverage, bool include_deletions, bool std_err) |
pair< float, float > | get_depth_from_index (const BinnedDepthIndex &depth_index, const string &path_name, size_t start_offset, size_t end_offset) |
Query index created above. More... | |
pair< double, double > | sample_mapping_depth (const HandleGraph &graph, const string &input_filename, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq, const string &format) |
pair< double, double > | sample_gam_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq) |
pair< double, double > | sample_mapping_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq) |
As above, but read a vector instead of a stream. More... | |
unordered_map< id_t, id_t > | dagify (const HandleGraph *graph, MutableHandleGraph *into, size_t min_preserved_path_length) |
void | dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn, const function< void(const edge_t &)> &edge_fn, const function< void(const edge_t &)> &tree_fn, const function< void(const edge_t &)> &edge_curr_fn, const function< void(const edge_t &)> &edge_cross_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks) |
void | dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks) |
void | dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn) |
bool | dijkstra (const HandleGraph *g, handle_t start, function< bool(const handle_t &, size_t)> reached_callback, bool traverse_leftward) |
bool | dijkstra (const HandleGraph *g, const unordered_set< handle_t > &starts, function< bool(const handle_t &, size_t)> reached_callback, bool traverse_leftward) |
bool | is_head_node (handle_t h, const HandleGraph *g) |
int32_t | distance_to_head (handle_t h, int32_t limit, const HandleGraph *graph) |
int32_t | distance_to_head (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph) |
bool | is_tail_node (handle_t h, const HandleGraph *g) |
int32_t | distance_to_tail (handle_t h, int32_t limit, const HandleGraph *graph) |
int32_t | distance_to_tail (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph) |
vector< handle_t > | eades_algorithm (const HandleGraph *graph) |
void | expand_context_by_steps (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward) |
void | expand_context_by_length (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward) |
void | expand_context (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward) |
void | expand_context_with_paths (const PathHandleGraph *source, MutablePathMutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward) |
void | extend (const HandleGraph *source, MutableHandleGraph *into) |
unordered_map< id_t, id_t > | extract_connecting_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_len, pos_t pos_1, pos_t pos_2, bool detect_terminal_cycles, bool only_paths, bool strict_max_len) |
void | extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &forward_search_lengths, const vector< size_t > &backward_search_lengths, size_t reversing_walk_length) |
void | extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, size_t max_dist, size_t reversing_walk_length) |
void | extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &position_max_dist, size_t reversing_walk_length) |
unordered_map< id_t, id_t > | extract_extending_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_dist, pos_t pos, bool backward, bool preserve_cycles_on_src_node) |
unordered_map< handle_t, size_t > | find_shortest_paths (const HandleGraph *g, handle_t start, bool traverse_leftward) |
vector< handle_t > | head_nodes (const HandleGraph *g) |
Find all of the nodes with no edges on their left sides. More... | |
vector< handle_t > | tail_nodes (const HandleGraph *g) |
Find all of the nodes with no edges on their right sides. More... | |
vector< handle_t > | find_tips (const HandleGraph *g) |
nid_t | parse_gfa_sequence_id (const string &str) |
void | validate_gfa_edge (const gfak::edge_elem &e) |
string | process_raw_gfa_path_name (const string &path_name_raw) |
void | gfa_to_handle_graph_in_memory (istream &in, MutableHandleGraph *graph, gfak::GFAKluge &gg) |
void | gfa_to_handle_graph_on_disk (const string &filename, MutableHandleGraph *graph, bool try_id_increment_hint, gfak::GFAKluge &gg) |
void | gfa_to_handle_graph_load_graph (const string &filename, istream *unseekable, MutableHandleGraph *graph, bool try_id_increment_hint, gfak::GFAKluge &gg) |
void | gfa_to_handle_graph_add_paths (const string &filename, istream *unseekable, MutablePathHandleGraph *graph, gfak::GFAKluge &gg) |
void | gfa_to_handle_graph (const string &filename, MutableHandleGraph *graph, bool try_from_disk, bool try_id_increment_hint) |
void | gfa_to_path_handle_graph (const string &filename, MutablePathMutableHandleGraph *graph, bool try_from_disk=true, bool try_id_increment_hint=false) |
Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph. More... | |
void | gfa_to_path_handle_graph_in_memory (istream &in, MutablePathMutableHandleGraph *graph) |
vector< handle_t > | id_order (const HandleGraph *g) |
bool | is_acyclic (const HandleGraph *graph) |
bool | is_directed_acyclic (const HandleGraph *graph) |
bool | is_single_stranded (const HandleGraph *graph) |
vector< handle_t > | single_stranded_orientation (const HandleGraph *graph) |
unordered_set< id_t > | make_single_stranded (MutableHandleGraph *graph) |
vector< pos_t > | jump_along_closest_path (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t jump_dist, size_t max_search_dist) |
pair< double, vector< handle_t > > | widest_dijkstra (const HandleGraph *g, handle_t source, handle_t sink, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, function< bool(const handle_t &)> is_node_ignored_callback, function< bool(const edge_t &)> is_edge_ignored_callback, bool greedy_avg) |
vector< pair< double, vector< handle_t > > > | yens_k_widest_paths (const HandleGraph *g, handle_t source, handle_t sink, size_t K, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, bool greedy_avg=false) |
Find the k widest paths. More... | |
void | for_each_kmer (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const kmer_t &)> &lambda) |
Iterate over all the kmers in the graph, running lambda on each. More... | |
std::ostream & | operator<< (std::ostream &out, const kmer_t &kmer) |
Print a kmer_t to a stream. More... | |
void | merge (handlegraph::MutablePathDeletableHandleGraph *graph, const vector< pair< handle_t, size_t >> &start, size_t length) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > | nearest_offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t max_search) |
map< string, vector< pair< size_t, bool > > > | offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > | simple_offsets_in_paths (const PathPositionHandleGraph *graph, pos_t pos) |
A "simple" model for path position getting for debugging. More... | |
map< pos_t, char > | next_pos_chars (const PathPositionHandleGraph &graph, pos_t pos) |
void | normalize (handlegraph::MutablePathDeletableHandleGraph *graph, int max_iter, bool debug) |
void | append_mapping_sequence (const Mapping &m, const string &node_seq, string &seq) |
use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping More... | |
string | path_string (const HandleGraph &graph, const Path &path) |
use the given graph and the path to determine our path string More... | |
void | remove_high_degree_nodes (DeletableHandleGraph &g, int max_degree) |
unordered_map< id_t, pair< id_t, bool > > | reverse_complement_graph (const HandleGraph *source, MutableHandleGraph *into) |
size_t | bellman_ford_shortest_cycle_length (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges) |
size_t | dijkstra_shortest_cycle_length (const HandleGraph *graph, const handle_t &source) |
Simple Dijkstra implementation that computes shortest cycle. More... | |
size_t | shortest_cycle_length_internal (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges) |
size_t | shortest_cycle_length (const HandleGraph *graph, const handle_t &source) |
size_t | shortest_cycle_length (const HandleGraph *graph) |
bool | simplify_siblings (handlegraph::MutablePathDeletableHandleGraph *graph) |
vector< pair< id_t, id_t > > | sorted_id_ranges (const HandleGraph *graph) |
Get a sorted list of inclusive ranges of IDs used in the given HandleGraph. More... | |
unordered_map< id_t, pair< id_t, bool > > | split_strands (const HandleGraph *source, MutableHandleGraph *into) |
vector< unordered_set< id_t > > | strongly_connected_components (const HandleGraph *g) |
Identify strongly connected components. More... | |
void | expand_subgraph_by_steps (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &steps, bool forward_only=false) |
expand the subgraph iteratively for this many steps More... | |
void | expand_subgraph_to_node_count (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &node_count, bool forward_only=false) |
expand the subgraph iteratively until its node count is at least node_count More... | |
void | expand_subgraph_by_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false) |
expand the subgraph iteratively to include at least length new sequence More... | |
void | expand_subgraph_to_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false) |
expand the subgraph iterativel until its total sequence length is greater than length More... | |
void | extract_context (const HandleGraph &source, MutableHandleGraph &subgraph, const handle_t &handle, const uint64_t &offset, const uint64_t &length, bool fwd, bool rev) |
expand the context around a single handle position More... | |
void | extract_id_range (const HandleGraph &source, const nid_t &id1, const nid_t &id2, MutableHandleGraph &subgraph) |
extract the node id range More... | |
void | extract_path_range (const PathPositionHandleGraph &source, path_handle_t path_handle, int64_t start, int64_t end, MutableHandleGraph &subgraph) |
void | add_subpaths_to_subgraph (const PathPositionHandleGraph &source, MutablePathHandleGraph &subgraph, bool subpath_naming) |
void | add_connecting_edges_to_subgraph (const HandleGraph &source, MutableHandleGraph &subgraph) |
void | three_edge_connected_component_merges_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(size_t, size_t)> &same_component) |
void | three_edge_connected_components_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback) |
void | three_edge_connected_components_dense_cactus (size_t node_count, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback) |
template<typename TECCNode > | |
void | three_edge_connected_components (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(TECCNode)> &)> &)> &component_callback) |
template<typename TECCNode > | |
void | three_edge_connected_component_merges (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(TECCNode, TECCNode)> &same_component) |
void | to_gfa (const PathHandleGraph &graph, ostream &out) |
vector< handle_t > | topological_order (const HandleGraph *g) |
vector< handle_t > | lazy_topological_order_internal (const HandleGraph *g, bool lazier) |
vector< handle_t > | lazy_topological_order (const HandleGraph *g) |
vector< handle_t > | lazier_topological_order (const HandleGraph *g) |
unordered_set< id_t > | orient_nodes_forward (MutableHandleGraph *g) |
void | unchop (handlegraph::MutablePathDeletableHandleGraph *graph) |
void | chop (handlegraph::MutableHandleGraph *graph, size_t max_node_length) |
void | for_each_walk (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const walk_t &)> &lambda) |
Iterate over all the walks in the graph, running lambda on each. More... | |
std::ostream & | operator<< (std::ostream &out, const walk_t &walk) |
Print a walk_t to a stream. More... | |
uint64_t | walk_haplotype_frequency (const HandleGraph &graph, const gbwt::GBWT &haplotypes, const walk_t &walk) |
vector< unordered_set< id_t > > | weakly_connected_components (const HandleGraph *graph) |
vector< pair< unordered_set< id_t >, vector< handle_t > > > | weakly_connected_components_with_tips (const HandleGraph *graph) |
bool | is_weakly_connected (const HandleGraph *graph) |
using vg::algorithms::BinnedDepthIndex = typedef unordered_map<string, map<size_t, map<size_t, pair<float, float> >> > |
Use the above function to retrieve the binned depths of a list of paths, and store them indexed by start coordinate. If std_err is true, store <mean, stderr> instead of <mean, variance> For each path, a series of indexes is computed, for bin sizes from min_bin_size, min_bin_size^(exp_growth_factor), etc.
vector< handle_t > vg::algorithms::a_star | ( | const HandleGraph * | graph, |
const pos_t & | pos_1, | ||
const pos_t & | pos_2, | ||
const DistHeuristic & | dist_heuristic, | ||
bool | find_min = true , |
||
int64_t | extremal_distance = numeric_limits<int64_t>::max() , |
||
bool | monotonic_heuristic = true |
||
) |
Implements the A* heuristic-guided search algorithm. Returns the path from pos_1 to pos_2 that is either minimal or maximal length, according to the parameters. Allows an extremal distance beyond which the algorithm will cease looking for paths (this should be a large value when looking for minimal paths and a small value when looking for maximum paths). If there is no path between the positions, or none within the extremal length, an empty vector will be returned.
void vg::algorithms::add_connecting_edges_to_subgraph | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph | ||
) |
We can accumulate a subgraph without accumulating all the edges between its nodes this helper ensures that we get the full set
void vg::algorithms::add_subpaths_to_subgraph | ( | const PathPositionHandleGraph & | source, |
MutablePathHandleGraph & | subgraph, | ||
bool | subpath_naming | ||
) |
add subpaths to the subgraph, providing a concatenation of subpaths that are discontiguous over the subgraph based on their order in the path position index provided by the source graph will clear any path found in both graphs before writing the new steps into it if subpath_naming is true, a suffix will be added to each path in the subgraph denoting its offset in the source graph (unless the subpath was not cut up at all)
unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::alignment_path_offsets | ( | const PathPositionHandleGraph & | graph, |
const Alignment & | aln, | ||
bool | just_min, | ||
bool | nearby, | ||
size_t | search_limit = 0 |
||
) |
Gives the path positions of the alignment. If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If nearby is set, will search for a nearby path. Will recurse with nearby set if it is not set on initial call and no positions are found. Respects search_limit in bp in that case. If search_limit is 0, read length is used.
void vg::algorithms::annotate_with_initial_path_positions | ( | const PathPositionHandleGraph & | graph, |
Alignment & | aln, | ||
size_t | search_limit = 0 |
||
) |
Use the graph to annotate an Alignment with the first position it touches on each reference path. Thread safe.
search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.
void vg::algorithms::annotate_with_initial_path_positions | ( | const PathPositionHandleGraph & | graph, |
vector< Alignment > & | aln, | ||
size_t | search_limit = 0 |
||
) |
Use the graph annotate Alignments with the first position they touch on each reference path. Thread safe.
search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.
void vg::algorithms::annotate_with_node_path_positions | ( | const PathPositionHandleGraph & | graph, |
Alignment & | aln, | ||
size_t | search_limit = 0 |
||
) |
Use the graph to annotate an Alignment with the first position it touches on each node it visits in each reference path. Thread safe. If no nodes on any path are visited, searches for a nearby path position to use.
search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.
void vg::algorithms::annotate_with_path_positions | ( | const PathPositionHandleGraph & | graph, |
Alignment & | aln, | ||
bool | just_min, | ||
size_t | search_limit = 0 |
||
) |
Use the graph to annotate an Alignment with positions on each reference path. Thread safe.
If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If no positions on the path are found, looks for nearby path positions in graph space. Respects search_limit in bp in that case. If search_limit is 0, read length is used.
void vg::algorithms::append_handle_graph | ( | const HandleGraph * | from, |
MutableHandleGraph * | into | ||
) |
Append the heads of from to the tails of into.
void vg::algorithms::append_mapping_sequence | ( | const Mapping & | m, |
const string & | node_seq, | ||
string & | seq | ||
) |
use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping
void vg::algorithms::append_path_handle_graph | ( | const PathHandleGraph * | from, |
MutablePathMutableHandleGraph * | into, | ||
bool | only_connect_path_tips = false |
||
) |
Append the heads of from to the tails of into Append all (shared) paths of from to into, copy the rest. If only_connect_path_tips is true, then only edges linking appended paths will be added (as opposed to every head and tail)
unordered_set< id_t > vg::algorithms::apply_orientations | ( | MutableHandleGraph * | graph, |
const vector< handle_t > & | orientations | ||
) |
Modifies underlying graph so that any node whose handle is given in the reverse orientation is flipped so that all locally forward orientations match the orientation of the provided handles. Returns a set of IDs for nodes that were flipped. Invalid if vector contains multiple handles to the same node. May change the ordering of the underlying graph.
bool vg::algorithms::are_equivalent | ( | const HandleGraph * | graph_1, |
const HandleGraph * | graph_2, | ||
bool | verbose = false |
||
) |
Checks whether two graphs are identical in their IDs, sequences, and edges. Optionally reports why graphs are found non-equivalent to stderr.
bool vg::algorithms::are_equivalent_with_paths | ( | const PathHandleGraph * | graph_1, |
const PathHandleGraph * | graph_2, | ||
bool | verbose = false |
||
) |
Checks whether two graphs are identical in their IDs, sequences, edges, and paths. Optionally reports why graphs are found non-equivalent to stderr.
size_t vg::algorithms::bellman_ford_shortest_cycle_length | ( | const HandleGraph * | graph, |
const handle_t & | source, | ||
const vector< handle_t > & | layout, | ||
const unordered_map< handle_t, size_t > & | handle_index, | ||
const vector< pair< size_t, size_t >> & | feedback_edges | ||
) |
An implementation of Bellman-Ford with Yen's ordering improvement applied to a layout ideally has a small feedback arc set
vector< tuple< size_t, size_t, double, double > > vg::algorithms::binned_packed_depth | ( | const Packer & | packer, |
const string & | path_name, | ||
size_t | bin_size, | ||
size_t | min_coverage, | ||
bool | include_deletions | ||
) |
Use all available threads to estimate the binned packed coverage of a path using above fucntion Each element is a bin's 0-based open-ended interval in the path, and its coverage mean,variance.
BinnedDepthIndex vg::algorithms::binned_packed_depth_index | ( | const Packer & | packer, |
const vector< string > & | path_names, | ||
size_t | min_bin_size, | ||
size_t | max_bin_size, | ||
double | exp_growth_factor, | ||
size_t | min_coverage, | ||
bool | include_deletions, | ||
bool | std_err | ||
) |
void vg::algorithms::chop | ( | handlegraph::MutableHandleGraph * | graph, |
size_t | max_node_length | ||
) |
Chop the graph so nodes are at most max_node_length
void vg::algorithms::copy_handle_graph | ( | const HandleGraph * | from, |
MutableHandleGraph * | into | ||
) |
Copies the nodes and edges from one graph into another.
void vg::algorithms::copy_path | ( | const PathHandleGraph * | from, |
const path_handle_t & | path, | ||
MutablePathHandleGraph * | into | ||
) |
Copies a path from one graph to another. Nodes and edges to support the path must already exist.
void vg::algorithms::copy_path_handle_graph | ( | const PathHandleGraph * | from, |
MutablePathMutableHandleGraph * | into | ||
) |
Copies the nodes, edges, and paths from one graph into another.
size_t vg::algorithms::count_walks | ( | const HandleGraph * | graph | ) |
Returns the number of source-to-sink walks through the graph Returns numeric_limits<size_t>::max() if the actual number of walks is larger than this.
tuple< vector< handle_t >, unordered_map< handle_t, size_t >, bool > vg::algorithms::count_walks_through_nodes | ( | const HandleGraph * | graph | ) |
Returns the count map through each snarl in a graph. Assumes that the graph is a single-stranded DAG. Consider checking these properties with algorithms::is_single_stranded and algorithms::is_directed_acyclic for safety.
unordered_map< id_t, id_t > vg::algorithms::dagify | ( | const HandleGraph * | graph, |
MutableHandleGraph * | into, | ||
size_t | min_preserved_path_length | ||
) |
void vg::algorithms::dfs | ( | const HandleGraph & | graph, |
const function< void(const handle_t &)> & | handle_begin_fn, | ||
const function< void(const handle_t &)> & | handle_end_fn, | ||
const function< bool(void)> & | break_fn | ||
) |
void vg::algorithms::dfs | ( | const HandleGraph & | graph, |
const function< void(const handle_t &)> & | handle_begin_fn, | ||
const function< void(const handle_t &)> & | handle_end_fn, | ||
const function< bool(void)> & | break_fn, | ||
const function< void(const edge_t &)> & | edge_fn, | ||
const function< void(const edge_t &)> & | tree_fn, | ||
const function< void(const edge_t &)> & | edge_curr_fn, | ||
const function< void(const edge_t &)> & | edge_cross_fn, | ||
const vector< handle_t > & | sources, | ||
const unordered_set< handle_t > & | sinks | ||
) |
void vg::algorithms::dfs | ( | const HandleGraph & | graph, |
const function< void(const handle_t &)> & | handle_begin_fn, | ||
const function< void(const handle_t &)> & | handle_end_fn, | ||
const vector< handle_t > & | sources, | ||
const unordered_set< handle_t > & | sinks | ||
) |
bool vg::algorithms::dijkstra | ( | const HandleGraph * | g, |
const unordered_set< handle_t > & | starts, | ||
function< bool(const handle_t &, size_t)> | reached_callback, | ||
bool | traverse_leftward = false |
||
) |
Same as the single-start version, except allows starting from multiple handles, all at distance 0.
bool vg::algorithms::dijkstra | ( | const HandleGraph * | g, |
handle_t | start, | ||
function< bool(const handle_t &, size_t)> | reached_callback, | ||
bool | traverse_leftward = false |
||
) |
Walk out from the given handle and visit all reachable handles (including the start) in the graph once, in closest-first order, accounting for sequence lengths. Walks right unless traverse_leftward is specified, in which case it walks left. Distances are measured between the outgoing side of the start node and the incoming side of the target.
When the shortest distance to a handle has been determined, calls reached_callback with that handle and the distance to it. Calls to reached_callback will occur in ascending order of distance. The reached_callback function must return true to continue the search, and false to abort it early.
Returns true if the search terminated normally, and false if it was aborted.
size_t vg::algorithms::dijkstra_shortest_cycle_length | ( | const HandleGraph * | graph, |
const handle_t & | source | ||
) |
Simple Dijkstra implementation that computes shortest cycle.
int32_t vg::algorithms::distance_to_head | ( | handle_t | h, |
int32_t | limit, | ||
const HandleGraph * | graph | ||
) |
int32_t vg::algorithms::distance_to_head | ( | handle_t | h, |
int32_t | limit, | ||
int32_t | dist, | ||
unordered_set< handle_t > & | seen, | ||
const HandleGraph * | graph | ||
) |
Get the distance in bases from start of node to start of closest head node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty
int32_t vg::algorithms::distance_to_tail | ( | handle_t | h, |
int32_t | limit, | ||
const HandleGraph * | graph | ||
) |
int32_t vg::algorithms::distance_to_tail | ( | handle_t | h, |
int32_t | limit, | ||
int32_t | dist, | ||
unordered_set< handle_t > & | seen, | ||
const HandleGraph * | graph | ||
) |
Get the distance in bases from end of node to end of closest tail node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty
vector< handle_t > vg::algorithms::eades_algorithm | ( | const HandleGraph * | graph | ) |
Returns a layout of handles that has a small number of edges that point backward along the layout (i.e. feedback arcs). Only valid for graphs that have a single stranded orientation. Consider checking this property with algorithms::single_stranded_orientation.
void vg::algorithms::expand_context | ( | const HandleGraph * | source, |
MutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | use_steps, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_context_by_length | ( | const HandleGraph * | source, |
MutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_context_by_steps | ( | const HandleGraph * | source, |
MutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_context_with_paths | ( | const PathHandleGraph * | source, |
MutablePathMutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | use_steps, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_subgraph_by_length | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | length, | ||
bool | forward_only | ||
) |
expand the subgraph iteratively to include at least length new sequence
void vg::algorithms::expand_subgraph_by_steps | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | steps, | ||
bool | forward_only | ||
) |
expand the subgraph iteratively for this many steps
void vg::algorithms::expand_subgraph_to_length | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | length, | ||
bool | forward_only | ||
) |
expand the subgraph iterativel until its total sequence length is greater than length
void vg::algorithms::expand_subgraph_to_node_count | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | node_count, | ||
bool | forward_only | ||
) |
expand the subgraph iteratively until its node count is at least node_count
void vg::algorithms::extend | ( | const HandleGraph * | source, |
MutableHandleGraph * | into | ||
) |
Adds the non-duplicative nodes and edges from 'source' to 'into'. Assumes that both graphs use the same ID space.
unordered_map< id_t, id_t > vg::algorithms::extract_connecting_graph | ( | const HandleGraph * | source, |
DeletableHandleGraph * | into, | ||
int64_t | max_len, | ||
pos_t | pos_1, | ||
pos_t | pos_2, | ||
bool | detect_terminal_cycles = false , |
||
bool | only_walks = false , |
||
bool | strict_max_len = false |
||
) |
Fills a DeletableHandleGraph with the subgraph of a HandleGraph that connects two positions. The nodes that contain the two positions will be 'cut' at the position and will be tips in the returned graph. By default, the algorithm provides only one guarantee:
Args: source graph to extract subgraph from into graph to extract into max_len guarantee finding walks along which pos_1 and pos_2 are this distance apart pos_1 start position, subgraph walks begin from here in same orientation pos_2 end position, subgraph walks end here in the same orientation detect_terminal_cycles also find walks that include cycles involving pos_1 and/or pos_2 only_walks only extract nodes and edges if they fall on some walk between pos_1 and pos_2 strict_max_len only extract nodes and edges if they fall on some walk between pos_1 and pos_2 that is under the maximum length (implies only_walks = true)
void vg::algorithms::extract_containing_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into, | ||
const vector< pos_t > & | positions, | ||
const vector< size_t > & | position_forward_max_dist, | ||
const vector< size_t > & | position_backward_max_dist, | ||
size_t | reversing_walk_length = 0 |
||
) |
Same semantics as previous except that there is a separate maximum distance for different positions in the graph and for each search direction. Each distance is associated with the position with the same index. The forward distance is in the same orientation as the position, and the backward distance is in the reverse orientation of the position. Throws an error if the position and distance vectors are not the same length.
void vg::algorithms::extract_containing_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into, | ||
const vector< pos_t > & | positions, | ||
const vector< size_t > & | position_max_dist, | ||
size_t | reversing_walk_length = 0 |
||
) |
Same semantics as previous except that there is a separate maximum distance for different positions in the graph. Each distance is associated with the position with the same index. Throws an error if the position and distance vectors are not the same length.
void vg::algorithms::extract_containing_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into, | ||
const vector< pos_t > & | positions, | ||
size_t | max_dist, | ||
size_t | reversing_walk_length = 0 |
||
) |
Fills graph 'into' with the subgraph of the handle graph 'source' that contains all of the positions in the positions vector and all other nodes and edges that can be reached within a maximum distance from any of these positions. Optionally also finds nodes and edges that can be reached within some distance from the previously mentioned nodes, except along non- proper bidirected walks. Node IDs in the subgraph are retained from the source graph.
Args: source graph to extract subgraph from into graph to extract into positions search outward from these positions max_dist include all nodes and edges that can be reached in at most this distance reversing_walk_length also find graph material that can be reached
void vg::algorithms::extract_context | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const handle_t & | handle, | ||
const uint64_t & | offset, | ||
const uint64_t & | length, | ||
bool | fwd, | ||
bool | rev | ||
) |
expand the context around a single handle position
unordered_map< id_t, id_t > vg::algorithms::extract_extending_graph | ( | const HandleGraph * | source, |
DeletableHandleGraph * | into, | ||
int64_t | max_dist, | ||
pos_t | pos, | ||
bool | backward, | ||
bool | preserve_cycles_on_src_node | ||
) |
Fills graph 'into' with the subgraph of the handle graph 'source' that extends in one direction from a given position, up to a maximum distance. The node containing the position will be "cut" so that only the portion that is forward in the search direction remains. Node IDs may be changed in the extracted graph, but they can be translated back to node IDs in the original graph with the returned map, although that translation procedure MUST handle the node that pos is on specially, as it may be cut. translate_node_ids from path.hpp can do this as long as you pass along what part of the node was removed. The node containing the source position may optionally be duplicated to preserve cycles on it after its cut, but no other nodes will will duplicated.
Args: source graph to extract subgraph from into graph to extract into max_dist include all nodes and edges that can be reached in at most this distance pos extend from this position backward extend in this direction preserve_cycles_on_src if necessary, duplicate starting node to preserve cycles after cutting it
void vg::algorithms::extract_id_range | ( | const HandleGraph & | source, |
const nid_t & | id1, | ||
const nid_t & | id2, | ||
MutableHandleGraph & | subgraph | ||
) |
extract the node id range
void vg::algorithms::extract_path_range | ( | const PathPositionHandleGraph & | source, |
path_handle_t | path_handle, | ||
int64_t | start, | ||
int64_t | end, | ||
MutableHandleGraph & | subgraph | ||
) |
extract the path range nodes aren't cut, so the returned graph may start before start and/or end after end if end < 0, then it will walk to the end of the path
unordered_map< handle_t, size_t > vg::algorithms::find_shortest_paths | ( | const HandleGraph * | g, |
handle_t | start, | ||
bool | traverse_leftward = false |
||
) |
Finds the length of the shortest oriented path from the given handle in a given direction to all reachable oriented nodes on a directed walk. Uses Dijkstra's Algorithm. Distances are measured between the outgoing side of the start node and the incoming side of the target.
vector< handle_t > vg::algorithms::find_tips | ( | const HandleGraph * | g | ) |
Find all of the tips in the graph, facing inward. Nodes with no edges will appear once in each orientation.
void vg::algorithms::for_each_kmer | ( | const HandleGraph & | graph, |
size_t | k, | ||
size_t | edge_max, | ||
const std::function< void(const kmer_t &)> & | lambda | ||
) |
Iterate over all the kmers in the graph, running lambda on each.
void vg::algorithms::for_each_walk | ( | const HandleGraph & | graph, |
size_t | k, | ||
size_t | edge_max, | ||
const std::function< void(const walk_t &)> & | lambda | ||
) |
Iterate over all the walks in the graph, running lambda on each.
pair< float, float > vg::algorithms::get_depth_from_index | ( | const BinnedDepthIndex & | depth_index, |
const string & | path_name, | ||
size_t | start_offset, | ||
size_t | end_offset | ||
) |
Query index created above.
void vg::algorithms::gfa_to_handle_graph | ( | const string & | filename, |
MutableHandleGraph * | graph, | ||
bool | try_from_disk = true , |
||
bool | try_id_increment_hint = false |
||
) |
Read a GFA file for a blunt-ended graph into a HandleGraph. Give "-" as a filename for stdin.
Optionally tries read the GFA from disk without creating an in-memory representation (defaults to in-memory algorithm if reading from stdin).
Also optionally provides a hint about the node ID range to the handle graph implementation before constructing it (defaults to no hint if reading from stdin).
Throws GFAFormatError if the GFA file is not acceptable, and std::ios_base::failure if an IO operation fails. Throws invalid_argument if otherwise misused.
void vg::algorithms::gfa_to_handle_graph_add_paths | ( | const string & | filename, |
istream * | unseekable, | ||
MutablePathHandleGraph * | graph, | ||
gfak::GFAKluge & | gg | ||
) |
After the given GFAKluge has been populated with nodes and edges, load path information. If the input is a seekable file, filename will be filled in and unseekable will be nullptr. If the input is not a seekable file, filename may be filled in, and unseekable will be set to a stream to read from.
void vg::algorithms::gfa_to_handle_graph_in_memory | ( | istream & | in, |
MutableHandleGraph * | graph, | ||
gfak::GFAKluge & | gg | ||
) |
void vg::algorithms::gfa_to_handle_graph_load_graph | ( | const string & | filename, |
istream * | unseekable, | ||
MutableHandleGraph * | graph, | ||
bool | try_id_increment_hint, | ||
gfak::GFAKluge & | gg | ||
) |
Parse nodes and edges and load them into the given GFAKluge. If the input is a seekable file, filename will be filled in and unseekable will be nullptr. If the input is not a seekable file, filename may be filled in, and unseekable will be set to a stream to read from.
void vg::algorithms::gfa_to_handle_graph_on_disk | ( | const string & | filename, |
MutableHandleGraph * | graph, | ||
bool | try_id_increment_hint, | ||
gfak::GFAKluge & | gg | ||
) |
void vg::algorithms::gfa_to_path_handle_graph | ( | const string & | filename, |
MutablePathMutableHandleGraph * | graph, | ||
bool | try_from_disk, | ||
bool | try_id_increment_hint | ||
) |
Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph.
void vg::algorithms::gfa_to_path_handle_graph_in_memory | ( | istream & | in, |
MutablePathMutableHandleGraph * | graph | ||
) |
Same as above but operating on a stream. Assumed to be non-seekable; all conversion happens in memory. Always streaming. Doesn't support ID increment hints.
vector< handle_t > vg::algorithms::head_nodes | ( | const HandleGraph * | g | ) |
Find all of the nodes with no edges on their left sides.
vector< handle_t > vg::algorithms::id_order | ( | const HandleGraph * | g | ) |
Order all the handles in the graph in ID order. All orientations are forward.
bool vg::algorithms::is_acyclic | ( | const HandleGraph * | graph | ) |
bool vg::algorithms::is_directed_acyclic | ( | const HandleGraph * | graph | ) |
Returns true if the graph contains no directed cycles. It may contain reversing cycles (i.e. true if no node can reach itself in the same orientation along a bidirected walk, but it might be able to reach itself in the opposite orientation).
bool vg::algorithms::is_head_node | ( | handle_t | h, |
const HandleGraph * | g | ||
) |
bool vg::algorithms::is_single_stranded | ( | const HandleGraph * | graph | ) |
Returns true if the graph contains no reversing edges (i.e. edges that connected the locally forward orientation of a node to the locally reverse orientation of of another node).
bool vg::algorithms::is_tail_node | ( | handle_t | h, |
const HandleGraph * | g | ||
) |
bool vg::algorithms::is_weakly_connected | ( | const HandleGraph * | graph | ) |
Returns true if graph is a single weakly connected component. Graphs with no nodes are considered connected.
vector< pos_t > vg::algorithms::jump_along_closest_path | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos, | ||
int64_t | jump_dist, | ||
size_t | max_search_dist | ||
) |
returns a vector of positions that are found by jumping a fixed oriented distance along path(s) from the given position. if the position is not on a path, searches from the position to a path and adds/subtracts the search distance to the jump depending on the search direction. returns an empty vector if there is no path within the max search distance or if the jump distance goes past the end of the path
vector< handle_t > vg::algorithms::lazier_topological_order | ( | const HandleGraph * | g | ) |
Order the nodes in a graph using a topological sort. Similar to lazy_topological_order but somewhat faster. The algorithm is invalid in a graph that has any cycles or any reversing edges. For safety, consider these properties with algorithms::is_acyclic() and algorithms::is_single_stranded().
vector< handle_t > vg::algorithms::lazy_topological_order | ( | const HandleGraph * | g | ) |
Order the nodes in a graph using a topological sort. The sort is NOT guaranteed to be machine-independent, but it is faster than topological_order(). This algorithm is invalid in a graph that has any cycles. For safety, consider this property with algorithms::is_directed_acyclic().
vector<handle_t> vg::algorithms::lazy_topological_order_internal | ( | const HandleGraph * | g, |
bool | lazier | ||
) |
unordered_set< id_t > vg::algorithms::make_single_stranded | ( | MutableHandleGraph * | graph | ) |
Finds a set of node orientations that can be applied so that there are no reversing edges (i.e. every edge connects a locally forward node traversal to another locally forward orientation). If no such combination of orientations exists, produces an error and exits. Returns a set of the node IDs for nodes that were swapped in orientation. Potentially invalidates any existing handles.
void vg::algorithms::merge | ( | handlegraph::MutablePathDeletableHandleGraph * | graph, |
const vector< pair< handle_t, size_t >> & | start, | ||
size_t | length | ||
) |
Merge the given ranges of bases on the given handles together, rewriting paths. Sequences must match. Handles to a single node may occur no more than once.
size_t vg::algorithms::min_approx_path_distance | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos1, | ||
const pos_t & | pos2, | ||
uint64_t | max_search | ||
) |
use the embedded paths to get an estimated minimum distance between the positions
unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::multipath_alignment_path_offsets | ( | const PathPositionHandleGraph & | graph, |
const multipath_alignment_t & | mp_aln | ||
) |
Find the position of a multipath alignment on paths. Returns the lowest offset position on a path for each contiguous stretch of the multipath alignment, but multiple positions on the same path may be returned if the multipath alignment is disconnected or fans out toward the sources or sinks.
unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::nearest_offsets_in_paths | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos, | ||
int64_t | max_search | ||
) |
Return, for the nearest position in a path to the given position, subject to the given max search distance, a mapping from path name to all positions on each path where that pos_t occurs.
map< pos_t, char > vg::algorithms::next_pos_chars | ( | const PathPositionHandleGraph & | graph, |
pos_t | pos | ||
) |
void vg::algorithms::normalize | ( | handlegraph::MutablePathDeletableHandleGraph * | graph, |
int | max_iter = 1 , |
||
bool | debug = false |
||
) |
Normalize a graph, performing up to the given number of iterations. Simplifies siblings and unchops runs of nodes, in a loop.
map< string, vector< pair< size_t, bool > > > vg::algorithms::offsets_in_paths | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos | ||
) |
Wrapper for the above to support some earlier code. Only looks for paths that directly touch the position, and returns the paths by name.
std::ostream & vg::algorithms::operator<< | ( | std::ostream & | out, |
const kmer_t & | kmer | ||
) |
Print a kmer_t to a stream.
std::ostream & vg::algorithms::operator<< | ( | std::ostream & | out, |
const walk_t & | walk | ||
) |
Print a walk_t to a stream.
unordered_set< id_t > vg::algorithms::orient_nodes_forward | ( | MutableHandleGraph * | g | ) |
Topologically sort the given handle graph, and then apply that sort to orient all the nodes in the global forward direction. May invalidate any paths stored by the graph. The re-orientation is guaranteed to be stable. Invalidates all handles into the graph (since any node might be flipped).
pair< double, double > vg::algorithms::packed_depth_of_bin | ( | const Packer & | packer, |
step_handle_t | start_step, | ||
step_handle_t | end_plus_one_step, | ||
size_t | min_coverage, | ||
bool | include_deletions | ||
) |
Estimate the coverage along a given reference path interval [start_step, end_plus_one_step) Coverage is obtained only from positions along the path, and variation is not counted Except if "include_deletions" is true, then reference path positions covered by a deletion edge (which is contained in the bin) will get the deletion edge's coverage counted. Other types of events (such as SNPs) can throw off coverage in similar ways but deletions tend to be bigger (and easier to find), so we hope that counting them is enough. If one wants to infer deletions from the coverage, obviously this should be false, but if looking for a background coverage for genotyping, then setting it to true may be helpful
void vg::algorithms::packed_depths | ( | const Packer & | packer, |
const string & | path_name, | ||
size_t | min_coverage, | ||
ostream & | out_stream | ||
) |
print path-name offset base-coverage for every base on a path (just like samtools depth) ignoring things below min_coverage. offsets are 1-based in output stream
nid_t vg::algorithms::parse_gfa_sequence_id | ( | const string & | str | ) |
std::string vg::algorithms::path_string | ( | const HandleGraph & | graph, |
const Path & | path | ||
) |
use the given graph and the path to determine our path string
string vg::algorithms::process_raw_gfa_path_name | ( | const string & | path_name_raw | ) |
void vg::algorithms::remove_high_degree_nodes | ( | DeletableHandleGraph & | g, |
int | max_degree | ||
) |
Remove nodes with >= max_degree total edges on each side. Note that end-to-start self loops count twice.
unordered_map< id_t, pair< id_t, bool > > vg::algorithms::reverse_complement_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into | ||
) |
Fills a MutableHandleGraph 'into' with a graph that has the same sequence and path space as 'source', but the forward strand of every node is flipped to the reverse strand. Reports an error and exits if 'into' is not empty.
pair<double, double> vg::algorithms::sample_gam_depth | ( | const HandleGraph & | graph, |
const vector< Alignment > & | alignments, | ||
size_t | max_nodes, | ||
size_t | random_seed, | ||
size_t | min_coverage, | ||
size_t | min_mapq | ||
) |
pair< double, double > vg::algorithms::sample_mapping_depth | ( | const HandleGraph & | graph, |
const string & | input_filename, | ||
size_t | max_nodes, | ||
size_t | random_seed, | ||
size_t | min_coverage, | ||
size_t | min_mapq, | ||
const string & | format = "GAM" |
||
) |
Return the mean and variance of coverage of randomly sampled nodes from a mappings file Nodes with less than min_coverage are ignored The input_filename can be - for stdin The stream is scanned in parallel with all threads max_nodes is used to keep memory down valid formats are "GAM" and "GAF"
pair<double, double> vg::algorithms::sample_mapping_depth | ( | const HandleGraph & | graph, |
const vector< Alignment > & | alignments, | ||
size_t | max_nodes, | ||
size_t | random_seed, | ||
size_t | min_coverage, | ||
size_t | min_mapq | ||
) |
As above, but read a vector instead of a stream.
size_t vg::algorithms::shortest_cycle_length | ( | const HandleGraph * | graph | ) |
Returns the length of the shortest cycle in the entire graph, or numeric_limits<size_t>::max() if no cycle exists.
size_t vg::algorithms::shortest_cycle_length | ( | const HandleGraph * | graph, |
const handle_t & | source | ||
) |
Returns the length of the shortest cycle containing the source node, or numeric_limits<size_t>::max() if no cycle exists.
size_t vg::algorithms::shortest_cycle_length_internal | ( | const HandleGraph * | graph, |
const handle_t & | source, | ||
const vector< handle_t > & | layout, | ||
const unordered_map< handle_t, size_t > & | handle_index, | ||
const vector< pair< size_t, size_t >> & | feedback_edges | ||
) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::simple_offsets_in_paths | ( | const PathPositionHandleGraph * | graph, |
pos_t | pos | ||
) |
A "simple" model for path position getting for debugging.
bool vg::algorithms::simplify_siblings | ( | handlegraph::MutablePathDeletableHandleGraph * | graph | ) |
Simplify siblings in the given graph.
When one base has two successors with the same base value, and those successors have the same set of predecessors, the successors will be merged.
Performs only a subset of the possible merges. Can only merge in from one side of a given node in a single invocation. Returns true if it made progress and there may be more merging to do.
Preserves paths.
vector< handle_t > vg::algorithms::single_stranded_orientation | ( | const HandleGraph * | graph | ) |
Returns a vector of handles where the orientation of each handle indicates an orientation that could be used to convert the graph into a single-stranded graph. That is, if all of the reverse handles (or all of the forward handles) were swapped in orientation, the graph would contain no reversing edges. Returns an empty vector if there is no such combination of node orientations (also if graph has no nodes).
vector< pair< id_t, id_t > > vg::algorithms::sorted_id_ranges | ( | const HandleGraph * | graph | ) |
Get a sorted list of inclusive ranges of IDs used in the given HandleGraph.
unordered_map< id_t, pair< id_t, bool > > vg::algorithms::split_strands | ( | const HandleGraph * | source, |
MutableHandleGraph * | into | ||
) |
Fills a MutableHandleGraph 'into' with a graph that has the same sequence and path space as 'source', but all of the sequences are on the forward strand. This is accomplished by creating a new node for each node in the source graph with the reverse complement sequence. Returns a map that translates node IDs from 'into' to their node ID and orientation in 'source'. Reports an error and exits if 'into' is not empty.
vector< unordered_set< id_t > > vg::algorithms::strongly_connected_components | ( | const HandleGraph * | handle_graph | ) |
Identify strongly connected components.
vector< handle_t > vg::algorithms::tail_nodes | ( | const HandleGraph * | g | ) |
Find all of the nodes with no edges on their right sides.
void vg::algorithms::three_edge_connected_component_merges | ( | const function< void(const function< void(TECCNode)> &)> & | for_each_node, |
const function< void(TECCNode, const function< void(TECCNode)> &)> & | for_each_connected_node, | ||
const function< void(TECCNode, TECCNode)> & | same_component | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).
Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.
Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!
If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.
void vg::algorithms::three_edge_connected_component_merges_dense | ( | size_t | node_count, |
size_t | first_root, | ||
const function< void(size_t, const function< void(size_t)> &)> & | for_each_connected_node, | ||
const function< void(size_t, size_t)> & | same_component | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.
Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.
Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!
This defines the data we track for each node in the graph
When in the DFS were we first visited?
When in the DFS were we last visited? Needed for finding replacement neighbors to implement path range absorption in part 1.3, when we're asked for a range to a neighbor that got eaten.
What is our "low point" in the search. This is the earliest dfs_counter for a node that this node or any node in its DFS subtree has a back-edge to.
What is the effective degree of this node in the graph with all the absorb-eject modifications applied?
What node has the continuation of this node's path? If equal to numeric_limits<number_t>::max(), the path ends after here. The node's path is the path from this node, into its DFS subtree, to (one of) the nodes in the subtree that has the back-edge that caused this node's low point to be so low. Basically a low point traceback.
Is this node actually on its own path? Nodes can be removed from their paths if those nodes don't matter any more (i.e. got absorbed) but their paths still need to be tails for other paths.
Has the node been visited yet? Must be 0. TODO: Move to its own vector to make zeroing them all free-ish with page table shenanigans.
Track the node that this stack frame represents
Track all the neighbors left to visit. When we visit a neighbor we pop it off the back.
When we look at the neighbors, we need to be able to tell the tree edge to the parent from further back edges to the parent. So we have a flag for whether we have seen the parent tree edge already, and the first neighbors entry that is our parent will get called the tree edge.
Track whether we made a recursive DFS call into the last neighbor or not. If we did, we need to do some work when we come out of it and return to this frame.
void vg::algorithms::three_edge_connected_components | ( | const function< void(const function< void(TECCNode)> &)> & | for_each_node, |
const function< void(TECCNode, const function< void(TECCNode)> &)> & | for_each_connected_node, | ||
const function< void(const function< void(const function< void(TECCNode)> &)> &)> & | component_callback | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).
Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.
For each component identified, calls the given callback with a function that iterates over all nodes in the component.
If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.
void vg::algorithms::three_edge_connected_components_dense | ( | size_t | node_count, |
size_t | first_root, | ||
const function< void(size_t, const function< void(size_t)> &)> & | for_each_connected_node, | ||
const function< void(const function< void(const function< void(size_t)> &)> &)> & | component_callback | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.
Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.
For each component identified, calls the given callback with a function that iterates over all nodes in the component.
void vg::algorithms::three_edge_connected_components_dense_cactus | ( | size_t | node_count, |
const function< void(size_t, const function< void(size_t)> &)> & | for_each_connected_node, | ||
const function< void(const function< void(const function< void(size_t)> &)> &)> & | component_callback | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.
Wraps the known good the 3 edge connected components algorithm from the pinchesAndCacti library.
Takes a total node count, and a function that, given a node, loops an iteratee over all nodes connected to it.
For each component identified, calls the given callback with a function that iterates over all nodes in the component.
void vg::algorithms::to_gfa | ( | const PathHandleGraph & | graph, |
ostream & | out | ||
) |
vector< handle_t > vg::algorithms::topological_order | ( | const HandleGraph * | g | ) |
Order and orient the nodes in the graph using a topological sort. The sort is guaranteed to be machine-independent given the initial graph's node and edge ordering. The algorithm is well-defined on non-DAG graphs, but the order is necessarily not a topological order.
We use a bidirected adaptation of Kahn's topological sort (1962), which can handle components with no heads or tails.
L ← Empty list that will contain the sorted and oriented elements S ← Set of nodes which have been oriented, but which have not had their downstream edges examined N ← Set of all nodes that have not yet been put into S
while N is nonempty do remove a node from N, orient it arbitrarily, and add it to S (In practice, we use "seeds": the heads all in a batch at the start, and any nodes we have seen that had too many incoming edges) while S is non-empty do remove an oriented node n from S add n to tail of L for each node m with an edge e from n to m do remove edge e from the graph if m has no other edges to that side then orient m such that the side the edge comes to is first remove m from N insert m into S otherwise put an oriented m on the list of arbitrary places to start when S is empty (This helps start at natural entry points to cycles) return L (a topologically sorted order and orientation)
void vg::algorithms::unchop | ( | handlegraph::MutablePathDeletableHandleGraph * | graph | ) |
Unchop by gluing abutting handles with just a single edge between them and compatible path steps together.
void vg::algorithms::validate_gfa_edge | ( | const gfak::edge_elem & | e | ) |
uint64_t vg::algorithms::walk_haplotype_frequency | ( | const HandleGraph & | graph, |
const gbwt::GBWT & | haplotypes, | ||
const walk_t & | walk | ||
) |
vector< unordered_set< id_t > > vg::algorithms::weakly_connected_components | ( | const HandleGraph * | graph | ) |
Returns sets of IDs defining components that are connected by any series of nodes and edges, even if it is not a valid bidirected walk. TODO: It might make sense to have a handle-returning version, but the consumers of weakly connected components right now want IDs, and membership in a weakly connected component is orientation-independent.
vector< pair< unordered_set< id_t >, vector< handle_t > > > vg::algorithms::weakly_connected_components_with_tips | ( | const HandleGraph * | graph | ) |
Return pairs of weakly connected component ID sets and the handles that are their tips, oriented inward. If a node is both a head and a tail, it will appear in tips in both orientations.
pair< double, vector< handle_t > > vg::algorithms::widest_dijkstra | ( | const HandleGraph * | g, |
handle_t | source, | ||
handle_t | sink, | ||
function< double(const handle_t &)> | node_weight_callback, | ||
function< double(const edge_t &)> | edge_weight_callback, | ||
function< bool(const handle_t &)> | is_node_ignored_callback, | ||
function< bool(const edge_t &)> | is_edge_ignored_callbback, | ||
bool | greedy_avg = false |
||
) |
This Dijkstra is the same underlying algorithm as the one in dijkstra.hpp but the interface is different enough that I opted to make it a seprate thing rather than add loads of optional arguments. The key differences are these generalizations: – looks for the "widest" path (maximum minimum weight) instead of shortest – counts node and edge weights (via callbakcs) – returns the path as well as the score – option for ignoring certain nodes and edges in search (required by Yen's algorithm) – greedy_avg option switches the algorithm to a heuristic (no optimal guarantee) search using the running averages support instead of min-flow support as objective function.
vector< pair< double, vector< handle_t > > > vg::algorithms::yens_k_widest_paths | ( | const HandleGraph * | g, |
handle_t | source, | ||
handle_t | sink, | ||
size_t | K, | ||
function< double(const handle_t &)> | node_weight_callback, | ||
function< double(const edge_t &)> | edge_weight_callback, | ||
bool | greedy_avg | ||
) |
Find the k widest paths.