vg
tools for working with variation graphs
Classes | Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes | Friends | List of all members
vg::MinimumDistanceIndex Class Reference

#include <min_distance.hpp>

Classes

class  ChainIndex
 
class  SnarlIndex
 

Public Member Functions

 MinimumDistanceIndex (const HandleGraph *graph, const SnarlManager *snarl_manager, int64_t cap=0)
 
 MinimumDistanceIndex (istream &in)
 
 MinimumDistanceIndex ()
 
void serialize (ostream &out) const
 
void load (istream &in)
 
int64_t node_length (id_t id) const
 
int64_t min_distance (pos_t pos1, pos_t pos2) const
 
int64_t max_distance (pos_t pos1, pos_t pos2) const
 
tuple< id_t, bool, bool > into_which_snarl (id_t node_id, bool reverse) const
 Get the start node (id and orientation pointing into the snarl) of the. More...
 
void subgraph_in_range (const Path &path, const HandleGraph *super_graph, int64_t min_distance, int64_t max_distance, std::unordered_set< id_t > &sub_graph, bool look_forward)
 
tuple< bool, size_t, size_t, bool, size_t, size_t, size_t, size_t, bool > get_minimizer_distances (pos_t pos)
 
int64_t top_level_chain_length (id_t node_id)
 
size_t get_connected_component (id_t node_id)
 
void write_snarls_to_json ()
 Write snarls out to stout. More...
 
void print_self () const
 print the distance index for debugging More...
 
void print_snarl_stats ()
 

Static Public Member Functions

static int64_t min_pos (vector< int64_t > vals)
 Helper function to find the minimum value that is not -1. More...
 
static int64_t min_pos (int64_t x, int64_t y)
 Helper function to find the minimum value that is not -1. More...
 

Protected Member Functions

int64_t calculate_min_index (const HandleGraph *graph, const SnarlManager *snarl_manager, const Chain *chain, size_t parent_id, bool rev_in_parent, bool trivial_chain, size_t depth, size_t component_num)
 
void populate_snarl_index (const HandleGraph *graph, const SnarlManager *snarl_manager, const NetGraph &ng, const Snarl *snarl, bool snarl_rev_in_chain, size_t snarl_assignment, hash_set< pair< id_t, bool >> &all_nodes, size_t depth, size_t component_num)
 
void calculate_max_index (const HandleGraph *graph, int64_t cap)
 
void add_nodes_in_range (const HandleGraph *super_graph, int64_t min_distance, int64_t max_distance, std::unordered_set< id_t > &sub_graph, vector< tuple< handle_t, int64_t >> &start_nodes, hash_set< pair< id_t, bool >> &seen_nodes)
 
tuple< int64_t, int64_t, pair< id_t, bool > > dist_to_common_ancestor (pair< size_t, bool > common_ancestor, pos_t &pos, bool rev) const
 
size_t get_primary_assignment (id_t i) const
 
size_t get_primary_rank (id_t i) const
 
size_t get_chain_assignment (id_t i) const
 
size_t get_chain_rank (id_t i) const
 
size_t get_secondary_assignment (id_t i) const
 
size_t get_secondary_rank (id_t i) const
 

Protected Attributes

vector< SnarlIndexsnarl_indexes
 vector of all SnarlIndex objects More...
 
vector< ChainIndexchain_indexes
 vector of all ChainIndex objects More...
 
sdsl::int_vector node_to_component
 
sdsl::int_vector component_to_chain_length
 
sdsl::int_vector component_to_chain_index
 
sdsl::int_vector primary_snarl_assignments
 
sdsl::int_vector primary_snarl_ranks
 
sdsl::int_vector secondary_snarl_assignments
 
sdsl::int_vector secondary_snarl_ranks
 Stores the ranks of nodes in secondary snarls. More...
 
sdsl::bit_vector has_secondary_snarl_bv
 
sdsl::rank_support_v< 1 > has_secondary_snarl
 
sdsl::int_vector chain_assignments
 
sdsl::int_vector chain_ranks
 
sdsl::bit_vector has_chain_bv
 
sdsl::rank_support_v< 1 > has_chain
 
id_t min_node_id
 
id_t max_node_id
 
size_t tree_depth
 The total depth of the snarl tree, starting from 0. More...
 
bool include_maximum
 True if we are including the maximum distance index. More...
 
sdsl::int_vector min_distances
 
sdsl::int_vector max_distances
 
string file_header = "distance index version 2.2"
 
bool include_component
 

Friends

class SnarlIndex
 
class ChainIndex
 
class SnarlSeedClusterer
 

Detailed Description

The distance index. Stores minimum distances among nodes in each netgraph and chain. Used for calculation of the minimum distance between two positions and for a maximum distance estimation. The maximum distance estimation is at least as large as the maximum distance between two positions up to a specified cap

Constructor & Destructor Documentation

◆ MinimumDistanceIndex() [1/3]

vg::MinimumDistanceIndex::MinimumDistanceIndex ( const HandleGraph graph,
const SnarlManager snarl_manager,
int64_t  cap = 0 
)

Constructor for the distance index. Cap is the distance up to which the maximum distance will give a reliable bound - if there is a path with length greater than cap, then the maximum distance will be at least cap If the cap is set to 0 (default), then the maximum distance index is not included

◆ MinimumDistanceIndex() [2/3]

vg::MinimumDistanceIndex::MinimumDistanceIndex ( istream &  in)

◆ MinimumDistanceIndex() [3/3]

vg::MinimumDistanceIndex::MinimumDistanceIndex ( )

Member Function Documentation

◆ add_nodes_in_range()

void vg::MinimumDistanceIndex::add_nodes_in_range ( const HandleGraph super_graph,
int64_t  min_distance,
int64_t  max_distance,
std::unordered_set< id_t > &  sub_graph,
vector< tuple< handle_t, int64_t >> &  start_nodes,
hash_set< pair< id_t, bool >> &  seen_nodes 
)
protected

Helper for subgraphInRange Given starting handles in the super graph and the distances to each handle (including the start position and

◆ calculate_max_index()

void vg::MinimumDistanceIndex::calculate_max_index ( const HandleGraph graph,
int64_t  cap 
)
protected

Compute min_distances and max_distances, which store distances needed for maximum distance calculation Only used if include_maximum is true

◆ calculate_min_index()

int64_t vg::MinimumDistanceIndex::calculate_min_index ( const HandleGraph graph,
const SnarlManager snarl_manager,
const Chain chain,
size_t  parent_id,
bool  rev_in_parent,
bool  trivial_chain,
size_t  depth,
size_t  component_num 
)
protected

Helper function for constructor - populate the minimum distance index Given the top level snarls

◆ dist_to_common_ancestor()

tuple< int64_t, int64_t, pair< id_t, bool > > vg::MinimumDistanceIndex::dist_to_common_ancestor ( pair< size_t, bool >  common_ancestor,
pos_t pos,
bool  rev 
) const
protected

Helper function for distance calculation Returns the distance to the start of and end of a node/snarl in The node in the common ancestor is a pair of <node id, rev> commonAncestor is a pair of the node_id and true if it is a chain rev is false if the pos is the start pos ( if it must be traversed forward) and false if it is the end pos (if it must be reached in the direction of pos)

◆ get_chain_assignment()

size_t vg::MinimumDistanceIndex::get_chain_assignment ( id_t  i) const
inlineprotected

◆ get_chain_rank()

size_t vg::MinimumDistanceIndex::get_chain_rank ( id_t  i) const
inlineprotected

◆ get_connected_component()

size_t vg::MinimumDistanceIndex::get_connected_component ( id_t  node_id)

◆ get_minimizer_distances()

tuple< bool, size_t, size_t, bool, size_t, size_t, size_t, size_t, bool > vg::MinimumDistanceIndex::get_minimizer_distances ( pos_t  pos)

◆ get_primary_assignment()

size_t vg::MinimumDistanceIndex::get_primary_assignment ( id_t  i) const
inlineprotected

Get the index into chain_indexes/rank in chain of node i. Detects and throws an error if node i never got assigned to a snarl.

◆ get_primary_rank()

size_t vg::MinimumDistanceIndex::get_primary_rank ( id_t  i) const
inlineprotected

◆ get_secondary_assignment()

size_t vg::MinimumDistanceIndex::get_secondary_assignment ( id_t  i) const
inlineprotected

◆ get_secondary_rank()

size_t vg::MinimumDistanceIndex::get_secondary_rank ( id_t  i) const
inlineprotected

◆ into_which_snarl()

tuple< id_t, bool, bool > vg::MinimumDistanceIndex::into_which_snarl ( id_t  node_id,
bool  reverse 
) const

Get the start node (id and orientation pointing into the snarl) of the.

◆ load()

void vg::MinimumDistanceIndex::load ( istream &  in)

◆ max_distance()

int64_t vg::MinimumDistanceIndex::max_distance ( pos_t  pos1,
pos_t  pos2 
) const

Get a maximum distance bound between the positions, ignoring direction Returns a positive value even if the two nodes are unreachable

◆ min_distance()

int64_t vg::MinimumDistanceIndex::min_distance ( pos_t  pos1,
pos_t  pos2 
) const

Get the minimum distance between two positions Distance includes only one of the positions. The distance from a position to itself would be 1 If there is no path between the two positions then the distance is -1

◆ min_pos() [1/2]

static int64_t vg::MinimumDistanceIndex::min_pos ( int64_t  x,
int64_t  y 
)
inlinestatic

Helper function to find the minimum value that is not -1.

◆ min_pos() [2/2]

int64_t vg::MinimumDistanceIndex::min_pos ( vector< int64_t >  vals)
static

Helper function to find the minimum value that is not -1.

◆ node_length()

int64_t vg::MinimumDistanceIndex::node_length ( id_t  id) const

◆ populate_snarl_index()

void vg::MinimumDistanceIndex::populate_snarl_index ( const HandleGraph graph,
const SnarlManager snarl_manager,
const NetGraph ng,
const Snarl snarl,
bool  snarl_rev_in_chain,
size_t  snarl_assignment,
hash_set< pair< id_t, bool >> &  all_nodes,
size_t  depth,
size_t  component_num 
)
protected

◆ print_self()

void vg::MinimumDistanceIndex::print_self ( ) const

print the distance index for debugging

◆ print_snarl_stats()

void vg::MinimumDistanceIndex::print_snarl_stats ( )

◆ serialize()

void vg::MinimumDistanceIndex::serialize ( ostream &  out) const

◆ subgraph_in_range()

void vg::MinimumDistanceIndex::subgraph_in_range ( const Path path,
const HandleGraph super_graph,
int64_t  min_distance,
int64_t  max_distance,
std::unordered_set< id_t > &  sub_graph,
bool  look_forward 
)

◆ top_level_chain_length()

int64_t vg::MinimumDistanceIndex::top_level_chain_length ( id_t  node_id)

◆ write_snarls_to_json()

void vg::MinimumDistanceIndex::write_snarls_to_json ( )

Write snarls out to stout.

Friends And Related Function Documentation

◆ ChainIndex

friend class ChainIndex
friend

◆ SnarlIndex

friend class SnarlIndex
friend

◆ SnarlSeedClusterer

friend class SnarlSeedClusterer
friend

Member Data Documentation

◆ chain_assignments

sdsl::int_vector vg::MinimumDistanceIndex::chain_assignments
protected

For each node, store the index and rank for the chain that the node belongs to, if any

◆ chain_indexes

vector< ChainIndex> vg::MinimumDistanceIndex::chain_indexes
protected

vector of all ChainIndex objects

◆ chain_ranks

sdsl::int_vector vg::MinimumDistanceIndex::chain_ranks
protected

◆ component_to_chain_index

sdsl::int_vector vg::MinimumDistanceIndex::component_to_chain_index
protected

◆ component_to_chain_length

sdsl::int_vector vg::MinimumDistanceIndex::component_to_chain_length
protected

◆ file_header

string vg::MinimumDistanceIndex::file_header = "distance index version 2.2"
protected

◆ has_chain

sdsl::rank_support_v<1> vg::MinimumDistanceIndex::has_chain
protected

◆ has_chain_bv

sdsl::bit_vector vg::MinimumDistanceIndex::has_chain_bv
protected

◆ has_secondary_snarl

sdsl::rank_support_v<1> vg::MinimumDistanceIndex::has_secondary_snarl
protected

◆ has_secondary_snarl_bv

sdsl::bit_vector vg::MinimumDistanceIndex::has_secondary_snarl_bv
protected

For each node, stores 1 if the node is in a secondary snarl and 0 otherwise. Use rank to find which index into secondary_snarls a node's secondary snarl is at

◆ include_component

bool vg::MinimumDistanceIndex::include_component
protected

◆ include_maximum

bool vg::MinimumDistanceIndex::include_maximum
protected

True if we are including the maximum distance index.

◆ max_distances

sdsl::int_vector vg::MinimumDistanceIndex::max_distances
protected

◆ max_node_id

id_t vg::MinimumDistanceIndex::max_node_id
protected

◆ min_distances

sdsl::int_vector vg::MinimumDistanceIndex::min_distances
protected

For each node in the graph, store the minimum and maximum distances from a tip to the node

◆ min_node_id

id_t vg::MinimumDistanceIndex::min_node_id
protected

◆ node_to_component

sdsl::int_vector vg::MinimumDistanceIndex::node_to_component
protected

◆ primary_snarl_assignments

sdsl::int_vector vg::MinimumDistanceIndex::primary_snarl_assignments
protected

Vector of length max node id - min node id For each node, stores the index into snarlIndexes for the primary snarl containing the node A primary snarl is the snarl that contains this node as an actual node, as opposed to a node representing a snarl or chain

◆ primary_snarl_ranks

sdsl::int_vector vg::MinimumDistanceIndex::primary_snarl_ranks
protected

For each node, stores the rank of the node in the snarlIndex indicated by primary_snarl_assignments Rank refers to the index into the SnarlIndex's distance matrix that represents a particular node The rank stored is always for the fd direction, rev direction is the index + 1. The first and last rank will always be the inward facing start and end nodes If the start node is traversed backwards to enter the snarl, then the rank 0 will represent the start node in reverse. The rank stored in this vector will be 1, representing the start node forward

◆ secondary_snarl_assignments

sdsl::int_vector vg::MinimumDistanceIndex::secondary_snarl_assignments
protected

Similar to primary snarls, stores snarl index of secondary snarl each node belongs to, if any. Secondary snarl can be a node that represents a snarl/chain in the netgraph of the parent snarl or a node that participates in multiple snarls in a chain. The primary snarl will always be the snarl that occurs first in the chain

◆ secondary_snarl_ranks

sdsl::int_vector vg::MinimumDistanceIndex::secondary_snarl_ranks
protected

Stores the ranks of nodes in secondary snarls.

◆ snarl_indexes

vector<SnarlIndex> vg::MinimumDistanceIndex::snarl_indexes
protected

vector of all SnarlIndex objects

◆ tree_depth

size_t vg::MinimumDistanceIndex::tree_depth
protected

The total depth of the snarl tree, starting from 0.


The documentation for this class was generated from the following files: