vg
tools for working with variation graphs
Classes | Public Member Functions | Private Member Functions | Private Attributes | Static Private Attributes | List of all members
bdsg::PackedGraph Class Reference

#include <packed_graph.hpp>

Inheritance diagram for bdsg::PackedGraph:
handlegraph::MutablePathDeletableHandleGraph handlegraph::SerializableHandleGraph handlegraph::MutablePathMutableHandleGraph handlegraph::DeletableHandleGraph handlegraph::MutablePathHandleGraph handlegraph::MutableHandleGraph handlegraph::MutableHandleGraph handlegraph::PathHandleGraph handlegraph::HandleGraph handlegraph::HandleGraph handlegraph::HandleGraph

Classes

struct  PackedPath
 

Public Member Functions

 PackedGraph ()
 
 ~PackedGraph ()
 
 PackedGraph (istream &in)
 Construct from a stream. More...
 
bool has_node (nid_t node_id) const
 Method to check if a node exists by ID. More...
 
handle_t get_handle (const nid_t &node_id, bool is_reverse=false) const
 Look up the handle for the node with the given ID in the given orientation. More...
 
nid_t get_id (const handle_t &handle) const
 Get the ID from a handle. More...
 
bool get_is_reverse (const handle_t &handle) const
 Get the orientation of a handle. More...
 
handle_t flip (const handle_t &handle) const
 Invert the orientation of a handle (potentially without getting its ID) More...
 
size_t get_length (const handle_t &handle) const
 Get the length of a node. More...
 
string get_sequence (const handle_t &handle) const
 Get the sequence of a node, presented in the handle's local forward orientation. More...
 
bool follow_edges_impl (const handle_t &handle, bool go_left, const std::function< bool(const handle_t &)> &iteratee) const
 
bool for_each_handle_impl (const std::function< bool(const handle_t &)> &iteratee, bool parallel=false) const
 
size_t get_edge_count () const
 
size_t get_total_length () const
 
char get_base (const handle_t &handle, size_t index) const
 
std::string get_subsequence (const handle_t &handle, size_t index, size_t size) const
 
size_t get_node_count (void) const
 Return the number of nodes in the graph. More...
 
nid_t min_node_id (void) const
 
nid_t max_node_id (void) const
 
handle_t create_handle (const std::string &sequence)
 
handle_t create_handle (const std::string &sequence, const nid_t &id)
 
void destroy_handle (const handle_t &handle)
 
void create_edge (const handle_t &left, const handle_t &right)
 
void destroy_edge (const handle_t &left, const handle_t &right)
 
void clear (void)
 Remove all nodes and edges. Does not update any stored paths. More...
 
handle_t apply_orientation (const handle_t &handle)
 
vector< handle_tdivide_handle (const handle_t &handle, const std::vector< size_t > &offsets)
 
void optimize (bool allow_id_reassignment=true)
 
void apply_ordering (const vector< handle_t > &order, bool compact_ids=false)
 
size_t get_path_count () const
 Returns the number of paths stored in the graph. More...
 
bool has_path (const std::string &path_name) const
 Determine if a path name exists and is legal to get a path handle for. More...
 
path_handle_t get_path_handle (const std::string &path_name) const
 
string get_path_name (const path_handle_t &path_handle) const
 Look up the name of a path from a handle to it. More...
 
bool get_is_circular (const path_handle_t &path_handle) const
 Look up whether a path is circular. More...
 
size_t get_step_count (const path_handle_t &path_handle) const
 Returns the number of node steps in the path. More...
 
handle_t get_handle_of_step (const step_handle_t &step_handle) const
 Get a node handle (node ID and orientation) from a handle to an step on a path. More...
 
step_handle_t path_begin (const path_handle_t &path_handle) const
 
step_handle_t path_end (const path_handle_t &path_handle) const
 
step_handle_t path_back (const path_handle_t &path_handle) const
 
step_handle_t path_front_end (const path_handle_t &path_handle) const
 
bool has_next_step (const step_handle_t &step_handle) const
 Returns true if the step is not the last step in a non-circular path. More...
 
bool has_previous_step (const step_handle_t &step_handle) const
 Returns true if the step is not the first step in a non-circular path. More...
 
step_handle_t get_next_step (const step_handle_t &step_handle) const
 
step_handle_t get_previous_step (const step_handle_t &step_handle) const
 
path_handle_t get_path_handle_of_step (const step_handle_t &step_handle) const
 Returns a handle to the path that an step is on. More...
 
bool for_each_path_handle_impl (const std::function< bool(const path_handle_t &)> &iteratee) const
 Execute a function on each path in the graph. More...
 
bool for_each_step_on_handle_impl (const handle_t &handle, const function< bool(const step_handle_t &)> &iteratee) const
 Calls the given function for each step of the given handle on a path. More...
 
void destroy_path (const path_handle_t &path)
 
path_handle_t create_path_handle (const string &name, bool is_circular=false)
 
step_handle_t append_step (const path_handle_t &path, const handle_t &to_append)
 
step_handle_t prepend_step (const path_handle_t &path, const handle_t &to_prepend)
 
pair< step_handle_t, step_handle_trewrite_segment (const step_handle_t &segment_begin, const step_handle_t &segment_end, const vector< handle_t > &new_segment)
 
void set_circularity (const path_handle_t &path, bool circular)
 
void set_id_increment (const nid_t &min_id)
 
void increment_node_ids (nid_t increment)
 
void reassign_node_ids (const std::function< nid_t(const nid_t &)> &get_new_id)
 
uint32_t get_magic_number () const
 Returns a static high-entropy number to indicate the class. More...
 
void report_memory (ostream &out, bool individual_paths=false) const
 
- Public Member Functions inherited from handlegraph::MutablePathDeletableHandleGraph
virtual ~MutablePathDeletableHandleGraph ()=default
 
- Public Member Functions inherited from handlegraph::MutablePathMutableHandleGraph
virtual ~MutablePathMutableHandleGraph ()=default
 
- Public Member Functions inherited from handlegraph::MutablePathHandleGraph
virtual ~MutablePathHandleGraph ()=default
 
virtual path_handle_t create_path_handle (const std::string &name, bool is_circular=false)=0
 
virtual std::pair< step_handle_t, step_handle_trewrite_segment (const step_handle_t &segment_begin, const step_handle_t &segment_end, const std::vector< handle_t > &new_segment)=0
 
- Public Member Functions inherited from handlegraph::PathHandleGraph
virtual ~PathHandleGraph ()=default
 
template<typename Iteratee >
bool for_each_path_handle (const Iteratee &iteratee) const
 
template<typename Iteratee >
bool for_each_step_on_handle (const handle_t &handle, const Iteratee &iteratee) const
 
virtual std::vector< step_handle_tsteps_of_handle (const handle_t &handle, bool match_orientation=false) const
 
virtual bool is_empty (const path_handle_t &path_handle) const
 Returns true if the given path is empty, and false otherwise. More...
 
PathForEachSocket scan_path (const path_handle_t &path) const
 
template<typename Iteratee >
bool for_each_step_in_path (const path_handle_t &path, const Iteratee &iteratee) const
 
- Public Member Functions inherited from handlegraph::HandleGraph
virtual ~HandleGraph ()=default
 
template<typename Iteratee >
bool follow_edges (const handle_t &handle, bool go_left, const Iteratee &iteratee) const
 
template<typename Iteratee >
bool for_each_handle (const Iteratee &iteratee, bool parallel=false) const
 
virtual size_t get_degree (const handle_t &handle, bool go_left) const
 
virtual bool has_edge (const handle_t &left, const handle_t &right) const
 
bool has_edge (const edge_t &edge) const
 Convenient wrapper of has_edge for edge_t argument. More...
 
handle_t forward (const handle_t &handle) const
 Get the locally forward version of a handle. More...
 
edge_t edge_handle (const handle_t &left, const handle_t &right) const
 
handle_t traverse_edge_handle (const edge_t &edge, const handle_t &left) const
 
template<typename Iteratee >
bool for_each_edge (const Iteratee &iteratee, bool parallel=false) const
 
- Public Member Functions inherited from handlegraph::MutableHandleGraph
virtual ~MutableHandleGraph ()=default
 
void create_edge (const edge_t &edge)
 Convenient wrapper for create_edge. More...
 
std::pair< handle_t, handle_tdivide_handle (const handle_t &handle, size_t offset)
 Specialization of divide_handle for a single division point. More...
 
virtual void apply_ordering (const std::vector< handle_t > &order, bool compact_ids=false)=0
 
- Public Member Functions inherited from handlegraph::DeletableHandleGraph
virtual ~DeletableHandleGraph ()=default
 
void destroy_edge (const edge_t &edge)
 Convenient wrapper for destroy_edge. More...
 
- Public Member Functions inherited from handlegraph::SerializableHandleGraph
virtual ~SerializableHandleGraph ()=default
 
void serialize (std::ostream &out) const
 
void serialize (const std::string &filename) const
 
void deserialize (std::istream &in)
 
void deserialize (const std::string &filename)
 

Private Member Functions

void serialize_members (ostream &out) const
 Write the graph to an out stream (called from the inherited 'serialize' method) More...
 
void deserialize_members (istream &in)
 Read the graph from an in stream (called from the inherited 'deserialize' method) More...
 
void tighten (void)
 
void compact_ids (const vector< handle_t > &order)
 
size_t new_node_record (nid_t node_id)
 
void remove_edge_reference (const handle_t &on, const handle_t &to)
 Find and edge on given handle, to a given handle, and remove it from the edge list. More...
 
void eject_deleted_paths ()
 If we've deleted any paths, remove them from the paths vector and reassign path IDs. More...
 
void defragment (bool force=false)
 
void defragment_path (const int64_t &path_idx, bool force=false)
 
PackedVector encode_and_assign_path_name (const string &path_name)
 Convert a path name into an integer vector, assigning new chars as necessary. More...
 
PackedVector encode_path_name (const string &path_name) const
 
void append_path_name (const string &path_name)
 
string decode_path_name (const int64_t &path_idx) const
 Decode the internal representation of a path name and return it as a string. More...
 
PackedVector extract_encoded_path_name (const int64_t &path_idx) const
 Extract the internal representation of a path name, but do not decode it. More...
 
uint64_t encode_nucleotide (const char &nt) const
 Convenience functions to translate between encodings in the vectors. More...
 
char decode_nucleotide (const uint64_t &val) const
 Map [0, 4] to nucleotides. More...
 
uint64_t complement_encoded_nucleotide (const uint64_t &val) const
 Complement nucleotide encoded as [0, 4]. More...
 
uint64_t get_assignment (const char &c) const
 
uint64_t get_or_make_assignment (const char &c)
 Get the integer assignment of a char, assigning a new one if necessary. More...
 
char get_char (const uint64_t &assignment) const
 Get the char assigned to an integer (must be already assigned) More...
 
size_t graph_iv_index (const handle_t &handle) const
 
uint64_t graph_index_to_seq_len_index (const size_t &graph_index) const
 
uint64_t graph_index_to_seq_start_index (const size_t &graph_index) const
 
uint64_t graph_index_to_node_member_index (const size_t &graph_index) const
 
const uint64_t & encode_traversal (const handle_t &handle) const
 
const handle_tdecode_traversal (const uint64_t &val) const
 
uint64_t get_next_edge_index (const uint64_t &edge_index) const
 
uint64_t get_edge_target (const uint64_t &edge_index) const
 
void set_edge_target (const uint64_t &edge_index, const handle_t &handle)
 
uint64_t get_next_membership (const uint64_t &membership_index) const
 
uint64_t get_membership_step (const uint64_t &membership_index) const
 
uint64_t get_membership_path (const uint64_t &membership_index) const
 
void set_next_membership (const uint64_t &membership_index, const uint64_t &next)
 
void set_membership_step (const uint64_t &membership_index, const uint64_t &step)
 
void set_membership_path (const uint64_t &membership_index, const uint64_t &path)
 
uint64_t get_step_trav (const PackedPath &path, const uint64_t &step_index) const
 
uint64_t get_step_prev (const PackedPath &path, const uint64_t &step_index) const
 
uint64_t get_step_next (const PackedPath &path, const uint64_t &step_index) const
 
void set_step_trav (PackedPath &path, const uint64_t &step_index, const uint64_t &trav)
 
void set_step_prev (PackedPath &path, const uint64_t &step_index, const uint64_t &prev_index)
 
void set_step_next (PackedPath &path, const uint64_t &step_index, const uint64_t &next_index)
 

Private Attributes

nid_t max_id = 0
 The maximum ID in the graph. More...
 
nid_t min_id = std::numeric_limits<nid_t>::max()
 The minimum ID in the graph. More...
 
PagedVector graph_iv
 
PagedVector seq_start_iv
 Encodes the start of a node's sequence in seq_iv. Matches the order of graph_iv. More...
 
PackedVector seq_length_iv
 Encodes the length of a node's sequence in seq_iv. Matches the order of graph_iv. More...
 
PagedVector edge_lists_iv
 
PackedDeque nid_to_graph_iv
 
PackedVector seq_iv
 Encodes all of the sequences of all nodes in the graph. More...
 
PagedVector path_membership_node_iv
 
PagedVector path_membership_id_iv
 
PagedVector path_membership_offset_iv
 1-based offset of the occurrence of the node in the corresponding PackedPath vector. More...
 
PagedVector path_membership_next_iv
 
hash_map< char, uint64_t > char_assignment
 We will reassign char values from the path names to small integers. More...
 
string inverse_char_assignment
 The inverse mapping from integer to the char value. More...
 
PackedVector path_names_iv
 
PagedVector path_name_start_iv
 
PackedVector path_name_length_iv
 The length of the path's name for the path with the same index in paths. More...
 
PackedVector path_is_deleted_iv
 Bit-vector that marks whether the path at the same index has been deleted. More...
 
PackedVector path_is_circular_iv
 Bit-vector that marks whether the path at the same index is circular. More...
 
PagedVector path_head_iv
 
PagedVector path_tail_iv
 
PackedVector path_deleted_steps_iv
 The number of steps that have have deleted from the path at the same index. More...
 
string_hash_map< PackedVector, int64_t > path_id
 Map from path names to index in the paths vector. More...
 
vector< PackedPathpaths
 Vector of the embedded paths in the graph. More...
 
uint64_t deleted_node_records = 0
 
uint64_t deleted_edge_records = 0
 
uint64_t deleted_membership_records = 0
 
uint64_t deleted_bases = 0
 
uint64_t reversing_self_edge_records = 0
 
uint64_t deleted_reversing_self_edge_records = 0
 

Static Private Attributes

static const double defrag_factor = .2
 Defragment data structures when the orphaned records are this fraction of the whole. More...
 
static const size_t NARROW_PAGE_WIDTH = 256
 We use standard page widths for page-compressed vectors. More...
 
static const size_t WIDE_PAGE_WIDTH = 1024
 
static const size_t GRAPH_RECORD_SIZE = 2
 
static const size_t GRAPH_START_EDGES_OFFSET = 0
 
static const size_t GRAPH_END_EDGES_OFFSET = 1
 
static const size_t SEQ_START_RECORD_SIZE = 1
 
static const size_t SEQ_LENGTH_RECORD_SIZE = 1
 
static const size_t EDGE_RECORD_SIZE = 2
 
static const size_t EDGE_TRAV_OFFSET = 0
 
static const size_t EDGE_NEXT_OFFSET = 1
 
static const size_t NODE_MEMBER_RECORD_SIZE = 1
 
static const size_t MEMBERSHIP_ID_RECORD_SIZE = 1
 
static const size_t MEMBERSHIP_OFFSET_RECORD_SIZE = 1
 
static const size_t MEMBERSHIP_NEXT_RECORD_SIZE = 1
 
static const size_t PATH_RECORD_SIZE = 2
 
static const size_t PATH_PREV_OFFSET = 0
 
static const size_t PATH_NEXT_OFFSET = 1
 
static const size_t STEP_RECORD_SIZE = 1
 
static const double PATH_RESIZE_FACTOR = 1.25
 

Additional Inherited Members

- Protected Member Functions inherited from handlegraph::PathHandleGraph
virtual bool for_each_step_on_handle_impl (const handle_t &handle, const std::function< bool(const step_handle_t &)> &iteratee) const =0
 

Detailed Description

PackedGraph is a HandleGraph implementation designed to use very little memory. It stores its data in bit-packed integer vectors, which are dynamically widened as needed in O(1) amortized time. Within these vectors, graphs are stored using adjacency linked lists.

Since removals of elements can cause slots in the internal vectors to become unused, the graph will occasionally defragment itself after some modification operations, which involves copying its internal data structures.

This implementation is a good choice when working with very large graphs, where the final memory usage of the constructed graph must be minimized. It is not a good choice when large fractions of the graph will need to be deleted and replaced; ODGI or HashGraph may be better for such workloads.

Constructor & Destructor Documentation

◆ PackedGraph() [1/2]

bdsg::PackedGraph::PackedGraph ( )

◆ ~PackedGraph()

bdsg::PackedGraph::~PackedGraph ( )

◆ PackedGraph() [2/2]

bdsg::PackedGraph::PackedGraph ( istream &  in)

Construct from a stream.

Member Function Documentation

◆ append_path_name()

void bdsg::PackedGraph::append_path_name ( const string &  path_name)
private

Encode the path name into the internal representation and append it to the master list of path names.

◆ append_step()

step_handle_t bdsg::PackedGraph::append_step ( const path_handle_t path,
const handle_t to_append 
)
virtual

Append a visit to a node to the given path. Returns a handle to the new final step on the path which is appended. Handles to prior steps on the path, and to other paths, must remain valid.

Implements handlegraph::MutablePathHandleGraph.

◆ apply_ordering()

void bdsg::PackedGraph::apply_ordering ( const vector< handle_t > &  order,
bool  compact_ids = false 
)

Reorder the graph's internal structure to match that given. This sets the order that is used for iteration in functions like for_each_handle. Optionally compact the id space of the graph to match the ordering, from 1->|ordering|. This may be a no-op in the case of graph implementations that do not have any mechanism to maintain an ordering.

◆ apply_orientation()

handle_t bdsg::PackedGraph::apply_orientation ( const handle_t handle)
virtual

Alter the node that the given handle corresponds to so the orientation indicated by the handle becomes the node's local forward orientation. Rewrites all edges pointing to the node and the node's sequence to reflect this. Invalidates all handles to the node (including the one passed). Returns a new, valid handle to the node in its new forward orientation. Note that it is possible for the node's ID to change. Does not update any stored paths. May change the ordering of the underlying graph.

Implements handlegraph::MutableHandleGraph.

◆ clear()

void bdsg::PackedGraph::clear ( void  )
virtual

Remove all nodes and edges. Does not update any stored paths.

Implements handlegraph::DeletableHandleGraph.

◆ compact_ids()

void bdsg::PackedGraph::compact_ids ( const vector< handle_t > &  order)
private

Compact the node ID space to [1, num_nodes] according the indicated order. Every node must be present in the vector exactly one time to be valid.

◆ complement_encoded_nucleotide()

uint64_t bdsg::PackedGraph::complement_encoded_nucleotide ( const uint64_t &  val) const
inlineprivate

Complement nucleotide encoded as [0, 4].

◆ create_edge()

void bdsg::PackedGraph::create_edge ( const handle_t left,
const handle_t right 
)
virtual

Create an edge connecting the given handles in the given order and orientations. Ignores existing edges.

Implements handlegraph::MutableHandleGraph.

◆ create_handle() [1/2]

handle_t bdsg::PackedGraph::create_handle ( const std::string &  sequence)
virtual

Create a new node with the given sequence and return the handle. The sequence may not be empty.

Implements handlegraph::MutableHandleGraph.

◆ create_handle() [2/2]

handle_t bdsg::PackedGraph::create_handle ( const std::string &  sequence,
const nid_t id 
)
virtual

Create a new node with the given id and sequence, then return the handle. The sequence may not be empty. The ID must be strictly greater than 0.

Implements handlegraph::MutableHandleGraph.

◆ create_path_handle()

path_handle_t bdsg::PackedGraph::create_path_handle ( const string &  name,
bool  is_circular = false 
)

Create a path with the given name. The caller must ensure that no path with the given name exists already, or the behavior is undefined. Returns a handle to the created empty path. Handles to other paths must remain valid.

◆ decode_nucleotide()

char bdsg::PackedGraph::decode_nucleotide ( const uint64_t &  val) const
inlineprivate

Map [0, 4] to nucleotides.

◆ decode_path_name()

string bdsg::PackedGraph::decode_path_name ( const int64_t &  path_idx) const
private

Decode the internal representation of a path name and return it as a string.

◆ decode_traversal()

const handle_t & bdsg::PackedGraph::decode_traversal ( const uint64_t &  val) const
inlineprivate

◆ defragment()

void bdsg::PackedGraph::defragment ( bool  force = false)
private

Check if have orphaned enough records in the graph's various linked lists to warrant reallocating and defragmenting them. If so, do it. Optionally, defragment even if we have not deleted many things.

◆ defragment_path()

void bdsg::PackedGraph::defragment_path ( const int64_t &  path_idx,
bool  force = false 
)
private

Check if have orphaned enough records in the linked list of the path to warrant reallocating and defragmenting it. If so, do it. Optionally, defragment even if we have not deleted many things. WARNING: invalidates step_handle_t's to this path.

◆ deserialize_members()

void bdsg::PackedGraph::deserialize_members ( istream &  in)
privatevirtual

Read the graph from an in stream (called from the inherited 'deserialize' method)

Implements handlegraph::SerializableHandleGraph.

◆ destroy_edge()

void bdsg::PackedGraph::destroy_edge ( const handle_t left,
const handle_t right 
)
virtual

Remove the edge connecting the given handles in the given order and orientations. Ignores nonexistent edges. Does not update any stored paths.

Implements handlegraph::DeletableHandleGraph.

◆ destroy_handle()

void bdsg::PackedGraph::destroy_handle ( const handle_t handle)
virtual

Remove the node belonging to the given handle and all of its edges. Destroys any paths in which the node participates. Invalidates the destroyed handle. May be called during serial for_each_handle iteration ONLY on the node being iterated. May NOT be called during parallel for_each_handle iteration. May NOT be called on the node from which edges are being followed during follow_edges. May NOT be called during iteration over paths, if it would destroy a path. May NOT be called during iteration along a path, if it would destroy that path.

Implements handlegraph::DeletableHandleGraph.

◆ destroy_path()

void bdsg::PackedGraph::destroy_path ( const path_handle_t path)
virtual

Destroy the given path. Invalidates handles to the path and its node steps.

Implements handlegraph::MutablePathHandleGraph.

◆ divide_handle()

std::vector< handle_t > bdsg::PackedGraph::divide_handle ( const handle_t handle,
const std::vector< size_t > &  offsets 
)
virtual

Split a handle's underlying node at the given offsets in the handle's orientation. Returns all of the handles to the parts. Other handles to the node being split may be invalidated. The split pieces stay in the same local forward orientation as the original node, but the returned handles come in the order and orientation appropriate for the handle passed in. Updates stored paths.

Implements handlegraph::MutableHandleGraph.

◆ eject_deleted_paths()

void bdsg::PackedGraph::eject_deleted_paths ( )
private

If we've deleted any paths, remove them from the paths vector and reassign path IDs.

◆ encode_and_assign_path_name()

PackedVector bdsg::PackedGraph::encode_and_assign_path_name ( const string &  path_name)
private

Convert a path name into an integer vector, assigning new chars as necessary.

◆ encode_nucleotide()

uint64_t bdsg::PackedGraph::encode_nucleotide ( const char &  nt) const
inlineprivate

Convenience functions to translate between encodings in the vectors.

Map nucleotides into [0, 4]

◆ encode_path_name()

PackedVector bdsg::PackedGraph::encode_path_name ( const string &  path_name) const
private

Convert a path name into an integer vector using only existing char assignments If the path name contains previously unseen characters, returns an empty vector.

◆ encode_traversal()

const uint64_t & bdsg::PackedGraph::encode_traversal ( const handle_t handle) const
inlineprivate

◆ extract_encoded_path_name()

PackedVector bdsg::PackedGraph::extract_encoded_path_name ( const int64_t &  path_idx) const
private

Extract the internal representation of a path name, but do not decode it.

◆ flip()

handle_t bdsg::PackedGraph::flip ( const handle_t handle) const
virtual

Invert the orientation of a handle (potentially without getting its ID)

Implements handlegraph::HandleGraph.

◆ follow_edges_impl()

bool bdsg::PackedGraph::follow_edges_impl ( const handle_t handle,
bool  go_left,
const std::function< bool(const handle_t &)> &  iteratee 
) const
virtual

Loop over all the handles to next/previous (right/left) nodes. Passes them to a callback which returns false to stop iterating and true to continue. Returns true if we finished and false if we stopped early.

Implements handlegraph::HandleGraph.

◆ for_each_handle_impl()

bool bdsg::PackedGraph::for_each_handle_impl ( const std::function< bool(const handle_t &)> &  iteratee,
bool  parallel = false 
) const
virtual

Loop over all the nodes in the graph in their local forward orientations, in their internal stored order. Stop if the iteratee returns false. Can be told to run in parallel, in which case stopping after a false return value is on a best-effort basis and iteration order is not defined.

Implements handlegraph::HandleGraph.

◆ for_each_path_handle_impl()

bool bdsg::PackedGraph::for_each_path_handle_impl ( const std::function< bool(const path_handle_t &)> &  iteratee) const
virtual

Execute a function on each path in the graph.

Implements handlegraph::PathHandleGraph.

◆ for_each_step_on_handle_impl()

bool bdsg::PackedGraph::for_each_step_on_handle_impl ( const handle_t handle,
const function< bool(const step_handle_t &)> &  iteratee 
) const

Calls the given function for each step of the given handle on a path.

◆ get_assignment()

uint64_t bdsg::PackedGraph::get_assignment ( const char &  c) const
inlineprivate

Get the integer assignment of a char, or numeric_limits<uint64_t>::max() if no assignment has been made

◆ get_base()

char bdsg::PackedGraph::get_base ( const handle_t handle,
size_t  index 
) const
virtual

Returns one base of a handle's sequence, in the orientation of the handle.

Reimplemented from handlegraph::HandleGraph.

◆ get_char()

char bdsg::PackedGraph::get_char ( const uint64_t &  assignment) const
inlineprivate

Get the char assigned to an integer (must be already assigned)

◆ get_edge_count()

size_t bdsg::PackedGraph::get_edge_count ( ) const
virtual

Return the total number of edges in the graph. If not overridden, counts them all in linear time.

Reimplemented from handlegraph::HandleGraph.

◆ get_edge_target()

uint64_t bdsg::PackedGraph::get_edge_target ( const uint64_t &  edge_index) const
inlineprivate

◆ get_handle()

handle_t bdsg::PackedGraph::get_handle ( const nid_t node_id,
bool  is_reverse = false 
) const
virtual

Look up the handle for the node with the given ID in the given orientation.

Implements handlegraph::HandleGraph.

◆ get_handle_of_step()

handle_t bdsg::PackedGraph::get_handle_of_step ( const step_handle_t step_handle) const
virtual

Get a node handle (node ID and orientation) from a handle to an step on a path.

Implements handlegraph::PathHandleGraph.

◆ get_id()

nid_t bdsg::PackedGraph::get_id ( const handle_t handle) const
virtual

Get the ID from a handle.

Implements handlegraph::HandleGraph.

◆ get_is_circular()

bool bdsg::PackedGraph::get_is_circular ( const path_handle_t path_handle) const
virtual

Look up whether a path is circular.

Implements handlegraph::PathHandleGraph.

◆ get_is_reverse()

bool bdsg::PackedGraph::get_is_reverse ( const handle_t handle) const
virtual

Get the orientation of a handle.

Implements handlegraph::HandleGraph.

◆ get_length()

size_t bdsg::PackedGraph::get_length ( const handle_t handle) const
virtual

Get the length of a node.

Implements handlegraph::HandleGraph.

◆ get_magic_number()

uint32_t bdsg::PackedGraph::get_magic_number ( ) const
virtual

Returns a static high-entropy number to indicate the class.

Implements handlegraph::SerializableHandleGraph.

◆ get_membership_path()

uint64_t bdsg::PackedGraph::get_membership_path ( const uint64_t &  membership_index) const
inlineprivate

◆ get_membership_step()

uint64_t bdsg::PackedGraph::get_membership_step ( const uint64_t &  membership_index) const
inlineprivate

◆ get_next_edge_index()

uint64_t bdsg::PackedGraph::get_next_edge_index ( const uint64_t &  edge_index) const
inlineprivate

◆ get_next_membership()

uint64_t bdsg::PackedGraph::get_next_membership ( const uint64_t &  membership_index) const
inlineprivate

◆ get_next_step()

step_handle_t bdsg::PackedGraph::get_next_step ( const step_handle_t step_handle) const
virtual

Returns a handle to the next step on the path. If the given step is the final step of a non-circular path, returns the past-the-last step that is also returned by path_end. In a circular path, the "last" step will loop around to the "first" (i.e. the one returned by path_begin). Note: to iterate over each step one time, even in a circular path, consider for_each_step_in_path.

Implements handlegraph::PathHandleGraph.

◆ get_node_count()

size_t bdsg::PackedGraph::get_node_count ( void  ) const
virtual

Return the number of nodes in the graph.

Implements handlegraph::HandleGraph.

◆ get_or_make_assignment()

uint64_t bdsg::PackedGraph::get_or_make_assignment ( const char &  c)
inlineprivate

Get the integer assignment of a char, assigning a new one if necessary.

◆ get_path_count()

size_t bdsg::PackedGraph::get_path_count ( ) const
virtual

Returns the number of paths stored in the graph.

Implements handlegraph::PathHandleGraph.

◆ get_path_handle()

path_handle_t bdsg::PackedGraph::get_path_handle ( const std::string &  path_name) const
virtual

Look up the path handle for the given path name. The path with that name must exist.

Implements handlegraph::PathHandleGraph.

◆ get_path_handle_of_step()

path_handle_t bdsg::PackedGraph::get_path_handle_of_step ( const step_handle_t step_handle) const
virtual

Returns a handle to the path that an step is on.

Implements handlegraph::PathHandleGraph.

◆ get_path_name()

string bdsg::PackedGraph::get_path_name ( const path_handle_t path_handle) const
virtual

Look up the name of a path from a handle to it.

Implements handlegraph::PathHandleGraph.

◆ get_previous_step()

step_handle_t bdsg::PackedGraph::get_previous_step ( const step_handle_t step_handle) const
virtual

Returns a handle to the previous step on the path. If the given step is the first step of a non-circular path, this method has undefined behavior. In a circular path, it will loop around from the "first" step (i.e. the one returned by path_begin) to the "last" step. Note: to iterate over each step one time, even in a circular path, consider for_each_step_in_path.

Implements handlegraph::PathHandleGraph.

◆ get_sequence()

string bdsg::PackedGraph::get_sequence ( const handle_t handle) const
virtual

Get the sequence of a node, presented in the handle's local forward orientation.

Implements handlegraph::HandleGraph.

◆ get_step_count()

size_t bdsg::PackedGraph::get_step_count ( const path_handle_t path_handle) const
virtual

Returns the number of node steps in the path.

Implements handlegraph::PathHandleGraph.

◆ get_step_next()

uint64_t bdsg::PackedGraph::get_step_next ( const PackedPath path,
const uint64_t &  step_index 
) const
inlineprivate

◆ get_step_prev()

uint64_t bdsg::PackedGraph::get_step_prev ( const PackedPath path,
const uint64_t &  step_index 
) const
inlineprivate

◆ get_step_trav()

uint64_t bdsg::PackedGraph::get_step_trav ( const PackedPath path,
const uint64_t &  step_index 
) const
inlineprivate

◆ get_subsequence()

string bdsg::PackedGraph::get_subsequence ( const handle_t handle,
size_t  index,
size_t  size 
) const
virtual

Returns a substring of a handle's sequence, in the orientation of the handle. If the indicated substring would extend beyond the end of the handle's sequence, the return value is truncated to the sequence's end.

Reimplemented from handlegraph::HandleGraph.

◆ get_total_length()

size_t bdsg::PackedGraph::get_total_length ( ) const
virtual

Return the total length of all nodes in the graph, in bp. If not overridden, loops over all nodes in linear time.

Reimplemented from handlegraph::HandleGraph.

◆ graph_index_to_node_member_index()

uint64_t bdsg::PackedGraph::graph_index_to_node_member_index ( const size_t &  graph_index) const
inlineprivate

◆ graph_index_to_seq_len_index()

uint64_t bdsg::PackedGraph::graph_index_to_seq_len_index ( const size_t &  graph_index) const
inlineprivate

◆ graph_index_to_seq_start_index()

uint64_t bdsg::PackedGraph::graph_index_to_seq_start_index ( const size_t &  graph_index) const
inlineprivate

◆ graph_iv_index()

size_t bdsg::PackedGraph::graph_iv_index ( const handle_t handle) const
inlineprivate

◆ has_next_step()

bool bdsg::PackedGraph::has_next_step ( const step_handle_t step_handle) const
virtual

Returns true if the step is not the last step in a non-circular path.

Implements handlegraph::PathHandleGraph.

◆ has_node()

bool bdsg::PackedGraph::has_node ( nid_t  node_id) const
virtual

Method to check if a node exists by ID.

Implements handlegraph::HandleGraph.

◆ has_path()

bool bdsg::PackedGraph::has_path ( const std::string &  path_name) const
virtual

Determine if a path name exists and is legal to get a path handle for.

Implements handlegraph::PathHandleGraph.

◆ has_previous_step()

bool bdsg::PackedGraph::has_previous_step ( const step_handle_t step_handle) const
virtual

Returns true if the step is not the first step in a non-circular path.

Implements handlegraph::PathHandleGraph.

◆ increment_node_ids()

void bdsg::PackedGraph::increment_node_ids ( nid_t  increment)
virtual

Add the given value to all node IDs

Reimplemented from handlegraph::MutableHandleGraph.

◆ max_node_id()

nid_t bdsg::PackedGraph::max_node_id ( void  ) const
virtual

Return the largest ID in the graph, or some larger number if the largest ID is unavailable. Return value is unspecified if the graph is empty.

Implements handlegraph::HandleGraph.

◆ min_node_id()

nid_t bdsg::PackedGraph::min_node_id ( void  ) const
virtual

Return the smallest ID in the graph, or some smaller number if the smallest ID is unavailable. Return value is unspecified if the graph is empty.

Implements handlegraph::HandleGraph.

◆ new_node_record()

size_t bdsg::PackedGraph::new_node_record ( nid_t  node_id)
private

Initialize all of the data corresponding with a new node and return it's 1-based offset

◆ optimize()

void bdsg::PackedGraph::optimize ( bool  allow_id_reassignment = true)
virtual

Adjust the representation of the graph in memory to improve performance. Optionally, allow the node IDs to be reassigned to further improve performance. Note: Ideally, this method is called one time once there is expected to be few graph modifications in the future.

Implements handlegraph::MutableHandleGraph.

◆ path_back()

step_handle_t bdsg::PackedGraph::path_back ( const path_handle_t path_handle) const
virtual

Get a handle to the last step, which will be an arbitrary step in a circular path that we consider "last" based on our construction of the path. If the path is empty then the implementation must return the same value as path_front_end().

Implements handlegraph::PathHandleGraph.

◆ path_begin()

step_handle_t bdsg::PackedGraph::path_begin ( const path_handle_t path_handle) const
virtual

Get a handle to the first step, or in a circular path to an arbitrary step considered "first". If the path is empty, returns the past-the-last step returned by path_end.

Implements handlegraph::PathHandleGraph.

◆ path_end()

step_handle_t bdsg::PackedGraph::path_end ( const path_handle_t path_handle) const
virtual

Get a handle to a fictitious position past the end of a path. This position is return by get_next_step for the final step in a path in a non-circular path. Note that get_next_step will NEVER return this value for a circular path.

Implements handlegraph::PathHandleGraph.

◆ path_front_end()

step_handle_t bdsg::PackedGraph::path_front_end ( const path_handle_t path_handle) const
virtual

Get a handle to a fictitious position before the beginning of a path. This position is return by get_previous_step for the first step in a path in a non-circular path. Note: get_previous_step will NEVER return this value for a circular path.

Implements handlegraph::PathHandleGraph.

◆ prepend_step()

step_handle_t bdsg::PackedGraph::prepend_step ( const path_handle_t path,
const handle_t to_prepend 
)
virtual

Prepend a visit to a node to the given path. Returns a handle to the new first step on the path which is appended. If the path is cirular, the new step is placed between the steps considered "last" and "first" by the method path_begin. Handles to later steps on the path, and to other paths, must remain valid.

Implements handlegraph::MutablePathHandleGraph.

◆ reassign_node_ids()

void bdsg::PackedGraph::reassign_node_ids ( const std::function< nid_t(const nid_t &)> &  get_new_id)
virtual

Reassign all node IDs as specified by the old->new mapping function.

Implements handlegraph::MutableHandleGraph.

◆ remove_edge_reference()

void bdsg::PackedGraph::remove_edge_reference ( const handle_t on,
const handle_t to 
)
private

Find and edge on given handle, to a given handle, and remove it from the edge list.

◆ report_memory()

void bdsg::PackedGraph::report_memory ( ostream &  out,
bool  individual_paths = false 
) const

Debugging function, measures memory and prints a report to an ostream. Optionally reports memory usage for every path individually.

◆ rewrite_segment()

pair< step_handle_t, step_handle_t > bdsg::PackedGraph::rewrite_segment ( const step_handle_t segment_begin,
const step_handle_t segment_end,
const vector< handle_t > &  new_segment 
)

Delete a segment of a path and rewrite it as some other sequence of steps. Returns a pair of step_handle_t's that indicate the range of the new segment in the path. The segment to delete should be designated by the first (begin) and past-last (end) step handles. If the step that is returned by path_begin is deleted, path_begin will now return the first step from the new segment or, in the case that the new segment is empty, the step used as segment_end. Empty ranges consist of two copies of the same step handle. Empty ranges in empty paths consist of two copies of the end sentinel handle for the path. Rewriting an empty range inserts before the provided end handle.

◆ serialize_members()

void bdsg::PackedGraph::serialize_members ( ostream &  out) const
privatevirtual

Write the graph to an out stream (called from the inherited 'serialize' method)

Implements handlegraph::SerializableHandleGraph.

◆ set_circularity()

void bdsg::PackedGraph::set_circularity ( const path_handle_t path,
bool  circular 
)
virtual

Make a path circular or non-circular. If the path is becoming circular, the last step is joined to the first step. If the path is becoming linear, the step considered "last" is unjoined from the step considered "first" according to the method path_begin.

Implements handlegraph::MutablePathHandleGraph.

◆ set_edge_target()

void bdsg::PackedGraph::set_edge_target ( const uint64_t &  edge_index,
const handle_t handle 
)
inlineprivate

◆ set_id_increment()

void bdsg::PackedGraph::set_id_increment ( const nid_t min_id)
virtual

Set a minimum id to increment the id space by, used as a hint during construction. May have no effect on a backing implementation.

Implements handlegraph::MutableHandleGraph.

◆ set_membership_path()

void bdsg::PackedGraph::set_membership_path ( const uint64_t &  membership_index,
const uint64_t &  path 
)
inlineprivate

◆ set_membership_step()

void bdsg::PackedGraph::set_membership_step ( const uint64_t &  membership_index,
const uint64_t &  step 
)
inlineprivate

◆ set_next_membership()

void bdsg::PackedGraph::set_next_membership ( const uint64_t &  membership_index,
const uint64_t &  next 
)
inlineprivate

◆ set_step_next()

void bdsg::PackedGraph::set_step_next ( PackedPath path,
const uint64_t &  step_index,
const uint64_t &  next_index 
)
inlineprivate

◆ set_step_prev()

void bdsg::PackedGraph::set_step_prev ( PackedPath path,
const uint64_t &  step_index,
const uint64_t &  prev_index 
)
inlineprivate

◆ set_step_trav()

void bdsg::PackedGraph::set_step_trav ( PackedPath path,
const uint64_t &  step_index,
const uint64_t &  trav 
)
inlineprivate

◆ tighten()

void bdsg::PackedGraph::tighten ( void  )
private

Attempt to compress data into less memory, possibly using more memory temporarily (especially useful before serializing). Node handles remain valid, but path and step handles are invalidated.

Member Data Documentation

◆ char_assignment

hash_map<char, uint64_t> bdsg::PackedGraph::char_assignment
private

We will reassign char values from the path names to small integers.

◆ defrag_factor

const double bdsg::PackedGraph::defrag_factor = .2
staticprivate

Defragment data structures when the orphaned records are this fraction of the whole.

Define all of the static class variables.

◆ deleted_bases

uint64_t bdsg::PackedGraph::deleted_bases = 0
private

◆ deleted_edge_records

uint64_t bdsg::PackedGraph::deleted_edge_records = 0
private

◆ deleted_membership_records

uint64_t bdsg::PackedGraph::deleted_membership_records = 0
private

◆ deleted_node_records

uint64_t bdsg::PackedGraph::deleted_node_records = 0
private

◆ deleted_reversing_self_edge_records

uint64_t bdsg::PackedGraph::deleted_reversing_self_edge_records = 0
private

◆ edge_lists_iv

PagedVector bdsg::PackedGraph::edge_lists_iv
private

Encodes a series of edges lists of nodes. {ID|orientation (bit-packed), next edge index}

◆ EDGE_NEXT_OFFSET

const size_t bdsg::PackedGraph::EDGE_NEXT_OFFSET = 1
staticprivate

◆ EDGE_RECORD_SIZE

const size_t bdsg::PackedGraph::EDGE_RECORD_SIZE = 2
staticprivate

◆ EDGE_TRAV_OFFSET

const size_t bdsg::PackedGraph::EDGE_TRAV_OFFSET = 0
staticprivate

◆ GRAPH_END_EDGES_OFFSET

const size_t bdsg::PackedGraph::GRAPH_END_EDGES_OFFSET = 1
staticprivate

◆ graph_iv

PagedVector bdsg::PackedGraph::graph_iv
private

Encodes the topology of the graph. Consists of fixed width records that represent offsets in edge_lists_iv. {start edge list index, end edge list index}

◆ GRAPH_RECORD_SIZE

const size_t bdsg::PackedGraph::GRAPH_RECORD_SIZE = 2
staticprivate

◆ GRAPH_START_EDGES_OFFSET

const size_t bdsg::PackedGraph::GRAPH_START_EDGES_OFFSET = 0
staticprivate

◆ inverse_char_assignment

string bdsg::PackedGraph::inverse_char_assignment
private

The inverse mapping from integer to the char value.

◆ max_id

nid_t bdsg::PackedGraph::max_id = 0
private

The maximum ID in the graph.

◆ MEMBERSHIP_ID_RECORD_SIZE

const size_t bdsg::PackedGraph::MEMBERSHIP_ID_RECORD_SIZE = 1
staticprivate

◆ MEMBERSHIP_NEXT_RECORD_SIZE

const size_t bdsg::PackedGraph::MEMBERSHIP_NEXT_RECORD_SIZE = 1
staticprivate

◆ MEMBERSHIP_OFFSET_RECORD_SIZE

const size_t bdsg::PackedGraph::MEMBERSHIP_OFFSET_RECORD_SIZE = 1
staticprivate

◆ min_id

nid_t bdsg::PackedGraph::min_id = std::numeric_limits<nid_t>::max()
private

The minimum ID in the graph.

◆ NARROW_PAGE_WIDTH

const size_t bdsg::PackedGraph::NARROW_PAGE_WIDTH = 256
staticprivate

We use standard page widths for page-compressed vectors.

◆ nid_to_graph_iv

PackedDeque bdsg::PackedGraph::nid_to_graph_iv
private

Encodes the 1-based offset of an ID in graph_iv in units of GRAPH_RECORD_SIZE. If no node with that ID exists, contains a 0. The index of a given ID is computed by (ID - min ID).

◆ NODE_MEMBER_RECORD_SIZE

const size_t bdsg::PackedGraph::NODE_MEMBER_RECORD_SIZE = 1
staticprivate

◆ path_deleted_steps_iv

PackedVector bdsg::PackedGraph::path_deleted_steps_iv
private

The number of steps that have have deleted from the path at the same index.

◆ path_head_iv

PagedVector bdsg::PackedGraph::path_head_iv
private

The 1-based index of the head of the linked list in steps_iv of the path with the same index in paths

◆ path_id

string_hash_map<PackedVector, int64_t> bdsg::PackedGraph::path_id
private

Map from path names to index in the paths vector.

◆ path_is_circular_iv

PackedVector bdsg::PackedGraph::path_is_circular_iv
private

Bit-vector that marks whether the path at the same index is circular.

◆ path_is_deleted_iv

PackedVector bdsg::PackedGraph::path_is_deleted_iv
private

Bit-vector that marks whether the path at the same index has been deleted.

◆ path_membership_id_iv

PagedVector bdsg::PackedGraph::path_membership_id_iv
private

Encodes a series of linked lists of the memberships within paths. The nodes in the linked list are split over three separate vectors, with the entry at the same index in each vector corresponding to the same linked list node. Path ID (0-based index)

◆ path_membership_next_iv

PagedVector bdsg::PackedGraph::path_membership_next_iv
private

1-based offset of the next occurrence of this node on a path within this vector (or 0 if there is none)

◆ path_membership_node_iv

PagedVector bdsg::PackedGraph::path_membership_node_iv
private

Encodes the membership of a node in all paths. In the same order as graph_iv. Consists of 1-based offset to the corresponding heads of linked lists in path_membership_value_iv, which contains the actual pointers into the paths.

◆ path_membership_offset_iv

PagedVector bdsg::PackedGraph::path_membership_offset_iv
private

1-based offset of the occurrence of the node in the corresponding PackedPath vector.

◆ path_name_length_iv

PackedVector bdsg::PackedGraph::path_name_length_iv
private

The length of the path's name for the path with the same index in paths.

◆ path_name_start_iv

PagedVector bdsg::PackedGraph::path_name_start_iv
private

The starting index of the path's name in path_names_iv for the path with the same index in paths

◆ path_names_iv

PackedVector bdsg::PackedGraph::path_names_iv
private

All path names, encoded according to the char assignments and concatenated in a single vector

◆ PATH_NEXT_OFFSET

const size_t bdsg::PackedGraph::PATH_NEXT_OFFSET = 1
staticprivate

◆ PATH_PREV_OFFSET

const size_t bdsg::PackedGraph::PATH_PREV_OFFSET = 0
staticprivate

◆ PATH_RECORD_SIZE

const size_t bdsg::PackedGraph::PATH_RECORD_SIZE = 2
staticprivate

◆ PATH_RESIZE_FACTOR

const double bdsg::PackedGraph::PATH_RESIZE_FACTOR = 1.25
staticprivate

◆ path_tail_iv

PagedVector bdsg::PackedGraph::path_tail_iv
private

The 1-based index of the tail of the linked list in steps_iv of the path with the same index in paths

◆ paths

vector<PackedPath> bdsg::PackedGraph::paths
private

Vector of the embedded paths in the graph.

◆ reversing_self_edge_records

uint64_t bdsg::PackedGraph::reversing_self_edge_records = 0
private

◆ seq_iv

PackedVector bdsg::PackedGraph::seq_iv
private

Encodes all of the sequences of all nodes in the graph.

◆ seq_length_iv

PackedVector bdsg::PackedGraph::seq_length_iv
private

Encodes the length of a node's sequence in seq_iv. Matches the order of graph_iv.

◆ SEQ_LENGTH_RECORD_SIZE

const size_t bdsg::PackedGraph::SEQ_LENGTH_RECORD_SIZE = 1
staticprivate

◆ seq_start_iv

PagedVector bdsg::PackedGraph::seq_start_iv
private

Encodes the start of a node's sequence in seq_iv. Matches the order of graph_iv.

◆ SEQ_START_RECORD_SIZE

const size_t bdsg::PackedGraph::SEQ_START_RECORD_SIZE = 1
staticprivate

◆ STEP_RECORD_SIZE

const size_t bdsg::PackedGraph::STEP_RECORD_SIZE = 1
staticprivate

◆ WIDE_PAGE_WIDTH

const size_t bdsg::PackedGraph::WIDE_PAGE_WIDTH = 1024
staticprivate

The documentation for this class was generated from the following files: