vg
tools for working with variation graphs
|
#include <hash_graph.hpp>
Classes | |
struct | node_t |
struct | path_mapping_t |
class | path_t |
Public Member Functions | |
HashGraph () | |
~HashGraph () | |
HashGraph (istream &in) | |
Deserialize from a stream of data. More... | |
bool | has_node (nid_t node_id) const |
Method to check if a node exists by ID. More... | |
handle_t | get_handle (const nid_t &node_id, bool is_reverse=false) const |
Look up the handle for the node with the given ID in the given orientation. More... | |
nid_t | get_id (const handle_t &handle) const |
Get the ID from a handle. More... | |
bool | get_is_reverse (const handle_t &handle) const |
Get the orientation of a handle. More... | |
handle_t | flip (const handle_t &handle) const |
Invert the orientation of a handle (potentially without getting its ID) More... | |
size_t | get_length (const handle_t &handle) const |
Get the length of a node. More... | |
string | get_sequence (const handle_t &handle) const |
Get the sequence of a node, presented in the handle's local forward orientation. More... | |
bool | follow_edges_impl (const handle_t &handle, bool go_left, const std::function< bool(const handle_t &)> &iteratee) const |
bool | for_each_handle_impl (const std::function< bool(const handle_t &)> &iteratee, bool parallel=false) const |
size_t | get_node_count (void) const |
nid_t | min_node_id (void) const |
nid_t | max_node_id (void) const |
size_t | get_degree (const handle_t &handle, bool go_left) const |
Efficiently get the number of edges attached to one side of a handle. More... | |
char | get_base (const handle_t &handle, size_t index) const |
string | get_subsequence (const handle_t &handle, size_t index, size_t size) const |
handle_t | create_handle (const std::string &sequence) |
handle_t | create_handle (const std::string &sequence, const nid_t &id) |
void | destroy_handle (const handle_t &handle) |
void | create_edge (const handle_t &left, const handle_t &right) |
void | destroy_edge (const handle_t &left, const handle_t &right) |
void | clear (void) |
Remove all nodes and edges. Does not update any stored paths. More... | |
handle_t | apply_orientation (const handle_t &handle) |
vector< handle_t > | divide_handle (const handle_t &handle, const std::vector< size_t > &offsets) |
void | optimize (bool allow_id_reassignment=true) |
void | apply_ordering (const vector< handle_t > &order, bool compact_ids=false) |
size_t | get_path_count () const |
Returns the number of paths stored in the graph. More... | |
bool | has_path (const std::string &path_name) const |
Determine if a path name exists and is legal to get a path handle for. More... | |
path_handle_t | get_path_handle (const std::string &path_name) const |
string | get_path_name (const path_handle_t &path_handle) const |
Look up the name of a path from a handle to it. More... | |
bool | get_is_circular (const path_handle_t &path_handle) const |
Look up whether a path is circular. More... | |
size_t | get_step_count (const path_handle_t &path_handle) const |
Returns the number of node steps in the path. More... | |
handle_t | get_handle_of_step (const step_handle_t &step_handle) const |
Get a node handle (node ID and orientation) from a handle to a step on a path. More... | |
path_handle_t | get_path_handle_of_step (const step_handle_t &step_handle) const |
Returns a handle to the path that a step is on. More... | |
step_handle_t | path_begin (const path_handle_t &path_handle) const |
step_handle_t | path_end (const path_handle_t &path_handle) const |
step_handle_t | path_back (const path_handle_t &path_handle) const |
step_handle_t | path_front_end (const path_handle_t &path_handle) const |
bool | has_next_step (const step_handle_t &step_handle) const |
Returns true if the step is not the last step in a non-circular path. More... | |
bool | has_previous_step (const step_handle_t &step_handle) const |
Returns true if the step is not the first step in a non-circular path. More... | |
step_handle_t | get_next_step (const step_handle_t &step_handle) const |
step_handle_t | get_previous_step (const step_handle_t &step_handle) const |
bool | for_each_path_handle_impl (const std::function< bool(const path_handle_t &)> &iteratee) const |
Execute a function on each path in the graph. More... | |
bool | for_each_step_on_handle_impl (const handle_t &handle, const function< bool(const step_handle_t &)> &iteratee) const |
Calls a function with all steps of a node on paths. More... | |
void | destroy_path (const path_handle_t &path) |
path_handle_t | create_path_handle (const string &name, bool is_circular=false) |
step_handle_t | append_step (const path_handle_t &path, const handle_t &to_append) |
step_handle_t | prepend_step (const path_handle_t &path, const handle_t &to_prepend) |
pair< step_handle_t, step_handle_t > | rewrite_segment (const step_handle_t &segment_begin, const step_handle_t &segment_end, const std::vector< handle_t > &new_segment) |
void | set_circularity (const path_handle_t &path, bool circular) |
void | set_id_increment (const nid_t &min_id) |
void | increment_node_ids (nid_t increment) |
void | reassign_node_ids (const std::function< nid_t(const nid_t &)> &get_new_id) |
uint32_t | get_magic_number () const |
Returns a static high-entropy number to indicate the class. More... | |
![]() | |
virtual | ~MutablePathDeletableHandleGraph ()=default |
![]() | |
virtual | ~MutablePathMutableHandleGraph ()=default |
![]() | |
virtual | ~MutablePathHandleGraph ()=default |
virtual path_handle_t | create_path_handle (const std::string &name, bool is_circular=false)=0 |
![]() | |
virtual | ~PathHandleGraph ()=default |
template<typename Iteratee > | |
bool | for_each_path_handle (const Iteratee &iteratee) const |
template<typename Iteratee > | |
bool | for_each_step_on_handle (const handle_t &handle, const Iteratee &iteratee) const |
virtual std::vector< step_handle_t > | steps_of_handle (const handle_t &handle, bool match_orientation=false) const |
virtual bool | is_empty (const path_handle_t &path_handle) const |
Returns true if the given path is empty, and false otherwise. More... | |
PathForEachSocket | scan_path (const path_handle_t &path) const |
template<typename Iteratee > | |
bool | for_each_step_in_path (const path_handle_t &path, const Iteratee &iteratee) const |
![]() | |
virtual | ~HandleGraph ()=default |
template<typename Iteratee > | |
bool | follow_edges (const handle_t &handle, bool go_left, const Iteratee &iteratee) const |
template<typename Iteratee > | |
bool | for_each_handle (const Iteratee &iteratee, bool parallel=false) const |
virtual bool | has_edge (const handle_t &left, const handle_t &right) const |
bool | has_edge (const edge_t &edge) const |
Convenient wrapper of has_edge for edge_t argument. More... | |
virtual size_t | get_edge_count () const |
virtual size_t | get_total_length () const |
handle_t | forward (const handle_t &handle) const |
Get the locally forward version of a handle. More... | |
edge_t | edge_handle (const handle_t &left, const handle_t &right) const |
handle_t | traverse_edge_handle (const edge_t &edge, const handle_t &left) const |
template<typename Iteratee > | |
bool | for_each_edge (const Iteratee &iteratee, bool parallel=false) const |
![]() | |
virtual | ~MutableHandleGraph ()=default |
void | create_edge (const edge_t &edge) |
Convenient wrapper for create_edge. More... | |
std::pair< handle_t, handle_t > | divide_handle (const handle_t &handle, size_t offset) |
Specialization of divide_handle for a single division point. More... | |
virtual void | apply_ordering (const std::vector< handle_t > &order, bool compact_ids=false)=0 |
![]() | |
virtual | ~DeletableHandleGraph ()=default |
void | destroy_edge (const edge_t &edge) |
Convenient wrapper for destroy_edge. More... | |
![]() | |
virtual | ~SerializableHandleGraph ()=default |
void | serialize (std::ostream &out) const |
void | serialize (const std::string &filename) const |
void | deserialize (std::istream &in) |
void | deserialize (const std::string &filename) |
Private Member Functions | |
void | serialize_members (ostream &out) const |
Write the graph to an out stream (called from the inherited 'serialize' method) More... | |
void | deserialize_members (istream &in) |
Read the graph from an in stream (called from the inherited 'deserialize' method) More... | |
Static Private Member Functions | |
static handle_t | set_id (const handle_t &internal, nid_t new_id) |
Replace the ID in a handle with a different number. More... | |
Private Attributes | |
nid_t | max_id = 0 |
The maximum ID in the graph. More... | |
nid_t | min_id = numeric_limits<nid_t>::max() |
The minimum ID in the graph. More... | |
hash_map< nid_t, node_t > | graph |
Encodes the graph topology. More... | |
string_hash_map< string, int64_t > | path_id |
Maps path names to path IDs. More... | |
hash_map< int64_t, path_t > | paths |
Maps path IDs to the actual paths. More... | |
int64_t | next_path_id = 1 |
The next path ID we will assign to a new path. More... | |
Additional Inherited Members | |
![]() | |
virtual bool | for_each_step_on_handle_impl (const handle_t &handle, const std::function< bool(const step_handle_t &)> &iteratee) const =0 |
HashGraph is a HandleGraph implementation designed for simplicity. Nodes are plain C++ objects stored in a hash map, which contain C++ vectors representing their adjacencies.
HashGraph is a good choice when fast access to or modification of a graph is required, but can use more memory than other graph implementations.
bdsg::HashGraph::HashGraph | ( | ) |
bdsg::HashGraph::~HashGraph | ( | ) |
bdsg::HashGraph::HashGraph | ( | istream & | in | ) |
Deserialize from a stream of data.
|
virtual |
Append a visit to a node to the given path. Returns a handle to the new final step on the path which is appended. If the path is cirular, the new step is placed between the steps considered "last" and "first" by the method path_begin. Handles to prior steps on the path, and to other paths, must remain valid.
Implements handlegraph::MutablePathHandleGraph.
void bdsg::HashGraph::apply_ordering | ( | const vector< handle_t > & | order, |
bool | compact_ids = false |
||
) |
Reorder the graph's internal structure to match that given. This sets the order that is used for iteration in functions like for_each_handle. Optionally compact the id space of the graph to match the ordering, from 1->|ordering|. This may be a no-op in the case of graph implementations that do not have any mechanism to maintain an ordering.
Alter the node that the given handle corresponds to so the orientation indicated by the handle becomes the node's local forward orientation. Rewrites all edges pointing to the node and the node's sequence to reflect this. Invalidates all handles to the node (including the one passed). Returns a new, valid handle to the node in its new forward orientation. Note that it is possible for the node's ID to change. Does not update any stored paths. May change the ordering of the underlying graph.
Implements handlegraph::MutableHandleGraph.
|
virtual |
Remove all nodes and edges. Does not update any stored paths.
Implements handlegraph::DeletableHandleGraph.
Create an edge connecting the given handles in the given order and orientations. Ignores existing edges.
Implements handlegraph::MutableHandleGraph.
|
virtual |
Create a new node with the given sequence and return the handle. The sequence may not be empty.
Implements handlegraph::MutableHandleGraph.
Create a new node with the given id and sequence, then return the handle. The sequence may not be empty. The ID must be strictly greater than 0.
Implements handlegraph::MutableHandleGraph.
path_handle_t bdsg::HashGraph::create_path_handle | ( | const string & | name, |
bool | is_circular = false |
||
) |
Create a path with the given name. The caller must ensure that no path with the given name exists already, or the behavior is undefined. Returns a handle to the created empty path. Handles to other paths must remain valid.
|
privatevirtual |
Read the graph from an in stream (called from the inherited 'deserialize' method)
Implements handlegraph::SerializableHandleGraph.
Remove the edge connecting the given handles in the given order and orientations. Ignores nonexistent edges. Does not update any stored paths.
Implements handlegraph::DeletableHandleGraph.
|
virtual |
Remove the node belonging to the given handle and all of its edges. Destroys any paths in which the node participates. Invalidates the destroyed handle. May be called during serial for_each_handle iteration ONLY on the node being iterated. May NOT be called during parallel for_each_handle iteration. May NOT be called on the node from which edges are being followed during follow_edges. May NOT be called during iteration over paths, if it would destroy a path. May NOT be called during iteration along a path, if it would destroy that path.
Implements handlegraph::DeletableHandleGraph.
|
virtual |
Destroy the given path. Invalidates handles to the path and its node steps.
Implements handlegraph::MutablePathHandleGraph.
|
virtual |
Split a handle's underlying node at the given offsets in the handle's orientation. Returns all of the handles to the parts. Other handles to the node being split may be invalidated. The split pieces stay in the same local forward orientation as the original node, but the returned handles come in the order and orientation appropriate for the handle passed in. Updates stored paths.
Implements handlegraph::MutableHandleGraph.
Invert the orientation of a handle (potentially without getting its ID)
Implements handlegraph::HandleGraph.
|
virtual |
Loop over all the handles to next/previous (right/left) nodes. Passes them to a callback which returns false to stop iterating and true to continue. Returns true if we finished and false if we stopped early.
Implements handlegraph::HandleGraph.
|
virtual |
Loop over all the nodes in the graph in their local forward orientations, in their internal stored order. Stop if the iteratee returns false. Can be told to run in parallel, in which case stopping after a false return value is on a best-effort basis and iteration order is not defined.
Implements handlegraph::HandleGraph.
|
virtual |
Execute a function on each path in the graph.
Implements handlegraph::PathHandleGraph.
bool bdsg::HashGraph::for_each_step_on_handle_impl | ( | const handle_t & | handle, |
const function< bool(const step_handle_t &)> & | iteratee | ||
) | const |
Calls a function with all steps of a node on paths.
|
virtual |
Returns one base of a handle's sequence, in the orientation of the handle.
Reimplemented from handlegraph::HandleGraph.
|
virtual |
Efficiently get the number of edges attached to one side of a handle.
Reimplemented from handlegraph::HandleGraph.
|
virtual |
Look up the handle for the node with the given ID in the given orientation.
Implements handlegraph::HandleGraph.
|
virtual |
Get a node handle (node ID and orientation) from a handle to a step on a path.
Implements handlegraph::PathHandleGraph.
Get the ID from a handle.
Implements handlegraph::HandleGraph.
|
virtual |
Look up whether a path is circular.
Implements handlegraph::PathHandleGraph.
|
virtual |
Get the orientation of a handle.
Implements handlegraph::HandleGraph.
|
virtual |
Get the length of a node.
Implements handlegraph::HandleGraph.
|
virtual |
Returns a static high-entropy number to indicate the class.
Implements handlegraph::SerializableHandleGraph.
|
virtual |
Returns a handle to the next step on the path. If the given step is the final step of a non-circular path, returns the past-the-last step that is also returned by path_end. In a circular path, the "last" step will loop around to the "first" (i.e. the one returned by path_begin). Note: to iterate over each step one time, even in a circular path, consider for_each_step_in_path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Return the number of nodes in the graph TODO: can't be node_count because XG has a field named node_count.
Implements handlegraph::HandleGraph.
|
virtual |
Returns the number of paths stored in the graph.
Implements handlegraph::PathHandleGraph.
|
virtual |
Look up the path handle for the given path name. The path with that name must exist.
Implements handlegraph::PathHandleGraph.
|
virtual |
Returns a handle to the path that a step is on.
Implements handlegraph::PathHandleGraph.
|
virtual |
Look up the name of a path from a handle to it.
Implements handlegraph::PathHandleGraph.
|
virtual |
Returns a handle to the previous step on the path. If the given step is the first step of a non-circular path, this method has undefined behavior. In a circular path, it will loop around from the "first" step (i.e. the one returned by path_begin) to the "last" step. Note: to iterate over each step one time, even in a circular path, consider for_each_step_in_path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Get the sequence of a node, presented in the handle's local forward orientation.
Implements handlegraph::HandleGraph.
|
virtual |
Returns the number of node steps in the path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Returns a substring of a handle's sequence, in the orientation of the handle. If the indicated substring would extend beyond the end of the handle's sequence, the return value is truncated to the sequence's end.
Reimplemented from handlegraph::HandleGraph.
|
virtual |
Returns true if the step is not the last step in a non-circular path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Method to check if a node exists by ID.
Implements handlegraph::HandleGraph.
|
virtual |
Determine if a path name exists and is legal to get a path handle for.
Implements handlegraph::PathHandleGraph.
|
virtual |
Returns true if the step is not the first step in a non-circular path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Add the given value to all node IDs
Reimplemented from handlegraph::MutableHandleGraph.
|
virtual |
Return the largest ID in the graph, or some larger number if the largest ID is unavailable. Return value is unspecified if the graph is empty.
Implements handlegraph::HandleGraph.
|
virtual |
Return the smallest ID in the graph, or some smaller number if the smallest ID is unavailable. Return value is unspecified if the graph is empty.
Implements handlegraph::HandleGraph.
|
virtual |
Adjust the representation of the graph in memory to improve performance. Optionally, allow the node IDs to be reassigned to further improve performance. Note: Ideally, this method is called one time once there is expected to be few graph modifications in the future.
tighten up the memory allocated to the vectors in the data structure
Implements handlegraph::MutableHandleGraph.
|
virtual |
Get a handle to the last step, which will be an arbitrary step in a circular path that we consider "last" based on our construction of the path. If the path is empty then the implementation must return the same value as path_front_end().
Implements handlegraph::PathHandleGraph.
|
virtual |
Get a handle to the first step, or in a circular path to an arbitrary step considered "first". If the path is empty, returns the past-the-last step returned by path_end.
Implements handlegraph::PathHandleGraph.
|
virtual |
Get a handle to a fictitious position past the end of a path. This position is return by get_next_step for the final step in a path in a non-circular path. Note that get_next_step will NEVER return this value for a circular path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Get a handle to a fictitious position before the beginning of a path. This position is return by get_previous_step for the first step in a path in a non-circular path. Note: get_previous_step will NEVER return this value for a circular path.
Implements handlegraph::PathHandleGraph.
|
virtual |
Prepend a visit to a node to the given path. Returns a handle to the new first step on the path which is appended. If the path is cirular, the new step is placed between the steps considered "last" and "first" by the method path_begin. Handles to later steps on the path, and to other paths, must remain valid.
Implements handlegraph::MutablePathHandleGraph.
|
virtual |
Reassign all node IDs as specified by the old->new mapping function.
Implements handlegraph::MutableHandleGraph.
|
virtual |
Delete a segment of a path and rewrite it as some other sequence of steps. Returns a pair of step_handle_t's that indicate the range of the new segment in the path. The segment to delete should be designated by the first (begin) and past-last (end) step handles. If the step that is returned by path_begin is deleted, path_begin will now return the first step from the new segment or, in the case that the new segment is empty, the step used as segment_end. Empty ranges consist of two copies of the same step handle. Empty ranges in empty paths consist of two copies of the end sentinel handle for the path. Rewriting an empty range inserts before the provided end handle.
Implements handlegraph::MutablePathHandleGraph.
|
privatevirtual |
Write the graph to an out stream (called from the inherited 'serialize' method)
Implements handlegraph::SerializableHandleGraph.
|
virtual |
Make a path circular or non-circular. If the path is becoming circular, the last step is joined to the first step. If the path is becoming linear, the step considered "last" is unjoined from the step considered "first" according to the method path_begin.
Implements handlegraph::MutablePathHandleGraph.
Replace the ID in a handle with a different number.
|
virtual |
Set a minimum id to increment the id space by, used as a hint during construction. May have no effect on a backing implementation.
Implements handlegraph::MutableHandleGraph.
|
private |
The maximum ID in the graph.
|
private |
The next path ID we will assign to a new path.
|
private |
Maps path names to path IDs.
|
private |
Maps path IDs to the actual paths.