vg
tools for working with variation graphs
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | List of all members
vg::Transcriptome Class Reference

#include <transcriptome.hpp>

Public Member Functions

 Transcriptome (const string &, const bool)
 
int32_t add_intron_splice_junctions (istream &intron_stream, unique_ptr< gbwt::GBWT > &haplotype_index)
 
int32_t add_transcript_splice_junctions (istream &transcript_stream, unique_ptr< gbwt::GBWT > &haplotype_index)
 
int32_t add_transcripts (istream &transcript_stream, const gbwt::GBWT &haplotype_index)
 
const vector< CompletedTranscriptPath > & transcript_paths () const
 Returns transcript paths. More...
 
int32_t size () const
 Returns number of transcript paths. More...
 
const MutablePathDeletableHandleGraphsplice_graph () const
 Returns spliced variation graph. More...
 
bool splice_graph_node_updated () const
 
void remove_non_transcribed (const bool new_reference_paths)
 
void compact_ordered ()
 Topological sort and compact graph. More...
 
int32_t embed_transcript_paths (const bool add_reference_paths, const bool add_non_reference_paths)
 
int32_t add_transcripts_to_gbwt (gbwt::GBWTBuilder *gbwt_builder, const bool output_reference_transcripts, const bool add_bidirectional) const
 
int32_t write_sequences (ostream *fasta_ostream, const bool output_reference_transcripts) const
 
int32_t write_info (ostream *tsv_ostream, const gbwt::GBWT &haplotype_index, const bool output_reference_transcripts) const
 
void write_splice_graph (ostream *graph_ostream) const
 Writes spliced variation graph to vg file. More...
 

Public Attributes

int32_t num_threads = 1
 Number of threads used for transcript path construction. More...
 
string feature_type
 Feature type to parse in the gtf/gff file. Parse all types if empty. More...
 
string transcript_tag
 Attribute tag used to parse the transcript id/name in the gtf/gff file. More...
 
bool use_all_paths = false
 Use all paths embedded in the graph for transcript path construction. More...
 
bool use_reference_paths = false
 Use reference paths embedded in the graph for transcript path construction. More...
 
bool collapse_transcript_paths = true
 Collapse identical transcript paths. More...
 

Private Member Functions

vector< Transcriptparse_introns (istream &intron_stream, const bdsg::PositionOverlay &graph_path_pos_overlay) const
 Parse BED file of introns. More...
 
vector< Transcriptparse_transcripts (istream &transcript_stream, const bdsg::PositionOverlay &graph_path_pos_overlay) const
 Parse gtf/gff3 file of transcripts. More...
 
float mean_node_length () const
 Returns the mean node length of the graph. More...
 
void add_exon (Transcript *transcript, const pair< int32_t, int32_t > &exon_pos, const bdsg::PositionOverlay &graph_path_pos_overlay) const
 
void reorder_exons (Transcript *transcript) const
 
list< EditedTranscriptPathconstruct_edited_transcript_paths (const vector< Transcript > &transcripts, const bdsg::PositionOverlay &graph_path_pos_overlay) const
 
void construct_edited_transcript_paths_callback (list< EditedTranscriptPath > *edited_transcript_paths, mutex *edited_transcript_paths_mutex, const bdsg::PositionOverlay &graph_path_pos_overlay, const int32_t thread_idx, const vector< Transcript > &transcripts) const
 Threaded edited transcript path construction. More...
 
void project_and_add_transcripts (const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const bdsg::PositionOverlay &graph_path_pos_overlay, const float mean_node_length)
 
void project_and_add_transcripts_callback (const int32_t thread_idx, const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const bdsg::PositionOverlay &graph_path_pos_overlay, const float mean_node_length)
 Threaded transcript projecting. More...
 
list< EditedTranscriptPathproject_transcript_gbwt (const Transcript &cur_transcript, const gbwt::GBWT &haplotype_index, const float mean_node_length) const
 Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths. More...
 
vector< pair< exon_nodes_t, thread_ids_t > > get_exon_haplotypes (const vg::id_t start_node, const vg::id_t end_node, const gbwt::GBWT &haplotype_index, const int32_t expected_length) const
 
list< EditedTranscriptPathproject_transcript_embedded (const Transcript &cur_transcript, const bdsg::PositionOverlay &graph_path_pos_overlay, const bool reference_only) const
 Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths. More...
 
void append_transcript_paths (list< CompletedTranscriptPath > *completed_transcript_path, list< CompletedTranscriptPath > *new_completed_transcript_paths, const bool add_unqiue_paths_only) const
 Adds new transcript paths to current set. Has argument to only add unique paths. More...
 
list< CompletedTranscriptPathconstruct_completed_transcript_paths (const list< EditedTranscriptPath > &edited_transcript_paths) const
 
vector< handle_tpath_to_handles (const Path &path) const
 
bool has_novel_exon_boundaries (const list< EditedTranscriptPath > &edited_transcript_paths, const bool include_transcript_ends) const
 
void augment_splice_graph (list< EditedTranscriptPath > *edited_transcript_paths, unique_ptr< gbwt::GBWT > &haplotype_index, const bool break_at_transcript_ends)
 
void update_haplotype_index (unique_ptr< gbwt::GBWT > &haplotype_index, const vector< Translation > &translation) const
 Update threads in gbwt index using graph translations. More...
 
void add_splice_junction_edges (const list< EditedTranscriptPath > &edited_transcript_paths)
 Adds transcript path splice-junction edges to splice graph. More...
 
void add_splice_junction_edges (const vector< CompletedTranscriptPath > &completed_transcript_paths)
 

Private Attributes

vector< CompletedTranscriptPath_transcript_paths
 Transcriptome represented by a set of transcript paths. More...
 
mutex mutex_transcript_paths
 
unique_ptr< MutablePathDeletableHandleGraph_splice_graph
 Spliced variation graph. More...
 
mutex mutex_splice_graph
 
bool _splice_graph_node_updated
 

Detailed Description

Class that defines a transcriptome represented by a set of transcript paths.

Constructor & Destructor Documentation

◆ Transcriptome()

vg::Transcriptome::Transcriptome ( const string &  graph_filename,
const bool  show_progress 
)

Member Function Documentation

◆ add_exon()

void vg::Transcriptome::add_exon ( Transcript transcript,
const pair< int32_t, int32_t > &  exon_pos,
const bdsg::PositionOverlay &  graph_path_pos_overlay 
) const
private

Finds the position of each end of a exon on a path in the
variation graph and adds the exon to a transcript.

◆ add_intron_splice_junctions()

int32_t vg::Transcriptome::add_intron_splice_junctions ( istream &  intron_stream,
unique_ptr< gbwt::GBWT > &  haplotype_index 
)

Add splice-junstions from a intron BED file. Returns number of parsed introns.

◆ add_splice_junction_edges() [1/2]

void vg::Transcriptome::add_splice_junction_edges ( const list< EditedTranscriptPath > &  edited_transcript_paths)
private

Adds transcript path splice-junction edges to splice graph.

◆ add_splice_junction_edges() [2/2]

void vg::Transcriptome::add_splice_junction_edges ( const vector< CompletedTranscriptPath > &  completed_transcript_paths)
private

◆ add_transcript_splice_junctions()

int32_t vg::Transcriptome::add_transcript_splice_junctions ( istream &  transcript_stream,
unique_ptr< gbwt::GBWT > &  haplotype_index 
)

Add splice-junstions from a transcript gtf/gff3 file. Returns number of parsed transcripts.

◆ add_transcripts()

int32_t vg::Transcriptome::add_transcripts ( istream &  transcript_stream,
const gbwt::GBWT &  haplotype_index 
)

Constructs transcript paths by projecting transcripts from a gtf/gff file onto embedded paths in a variation graph and/or haplotypes in a GBWT index. Augments graph with transcriptome splice-junctions. Returns number of transcript paths added.

◆ add_transcripts_to_gbwt()

int32_t vg::Transcriptome::add_transcripts_to_gbwt ( gbwt::GBWTBuilder *  gbwt_builder,
const bool  output_reference_transcripts,
const bool  add_bidirectional 
) const

Add transcript paths as threads in GBWT index. Returns number of added threads.

◆ append_transcript_paths()

void vg::Transcriptome::append_transcript_paths ( list< CompletedTranscriptPath > *  completed_transcript_path,
list< CompletedTranscriptPath > *  new_completed_transcript_paths,
const bool  add_unqiue_paths_only 
) const
private

Adds new transcript paths to current set. Has argument to only add unique paths.

◆ augment_splice_graph()

void vg::Transcriptome::augment_splice_graph ( list< EditedTranscriptPath > *  edited_transcript_paths,
unique_ptr< gbwt::GBWT > &  haplotype_index,
const bool  break_at_transcript_ends 
)
private

Augments the variation graph with transcript path exon boundaries and splice-junctions. Updates threads in gbwt index to match the augmented graph.

◆ compact_ordered()

void vg::Transcriptome::compact_ordered ( )

Topological sort and compact graph.

◆ construct_completed_transcript_paths()

list< CompletedTranscriptPath > vg::Transcriptome::construct_completed_transcript_paths ( const list< EditedTranscriptPath > &  edited_transcript_paths) const
private

Constructs completed transcripts paths from edited transcript paths. Checks that the paths contain no edits compared to the graph.

◆ construct_edited_transcript_paths()

list< EditedTranscriptPath > vg::Transcriptome::construct_edited_transcript_paths ( const vector< Transcript > &  transcripts,
const bdsg::PositionOverlay &  graph_path_pos_overlay 
) const
private

Constructs edited transcript paths from a set of reference transcripts.

◆ construct_edited_transcript_paths_callback()

void vg::Transcriptome::construct_edited_transcript_paths_callback ( list< EditedTranscriptPath > *  edited_transcript_paths,
mutex *  edited_transcript_paths_mutex,
const bdsg::PositionOverlay &  graph_path_pos_overlay,
const int32_t  thread_idx,
const vector< Transcript > &  transcripts 
) const
private

Threaded edited transcript path construction.

◆ embed_transcript_paths()

int32_t vg::Transcriptome::embed_transcript_paths ( const bool  add_reference_paths,
const bool  add_non_reference_paths 
)

Embeds transcript paths in spliced variation graph.
Returns number of paths embedded.

◆ get_exon_haplotypes()

vector< pair< exon_nodes_t, thread_ids_t > > vg::Transcriptome::get_exon_haplotypes ( const vg::id_t  start_node,
const vg::id_t  end_node,
const gbwt::GBWT &  haplotype_index,
const int32_t  expected_length 
) const
private

Extracts all unique haplotype paths between two nodes from a GBWT index and returns the resulting paths and the corresponding haplotype ids for each path.

◆ has_novel_exon_boundaries()

bool vg::Transcriptome::has_novel_exon_boundaries ( const list< EditedTranscriptPath > &  edited_transcript_paths,
const bool  include_transcript_ends 
) const
private

Checks whether transcript path only consist of whole nodes (complete).

◆ mean_node_length()

float vg::Transcriptome::mean_node_length ( ) const
private

Returns the mean node length of the graph.

◆ parse_introns()

vector< Transcript > vg::Transcriptome::parse_introns ( istream &  intron_stream,
const bdsg::PositionOverlay &  graph_path_pos_overlay 
) const
private

Parse BED file of introns.

◆ parse_transcripts()

vector< Transcript > vg::Transcriptome::parse_transcripts ( istream &  transcript_stream,
const bdsg::PositionOverlay &  graph_path_pos_overlay 
) const
private

Parse gtf/gff3 file of transcripts.

◆ path_to_handles()

vector< handle_t > vg::Transcriptome::path_to_handles ( const Path path) const
private

Convert a path to a vector of handles. Checks that the path is complete (i.e. only consist of whole nodes).

◆ project_and_add_transcripts()

void vg::Transcriptome::project_and_add_transcripts ( const vector< Transcript > &  transcripts,
const gbwt::GBWT &  haplotype_index,
const bdsg::PositionOverlay &  graph_path_pos_overlay,
const float  mean_node_length 
)
private

Constructs transcript paths by projecting transcripts onto embedded paths in a variation graph and/or haplotypes in a GBWT index.

◆ project_and_add_transcripts_callback()

void vg::Transcriptome::project_and_add_transcripts_callback ( const int32_t  thread_idx,
const vector< Transcript > &  transcripts,
const gbwt::GBWT &  haplotype_index,
const bdsg::PositionOverlay &  graph_path_pos_overlay,
const float  mean_node_length 
)
private

Threaded transcript projecting.

◆ project_transcript_embedded()

list< EditedTranscriptPath > vg::Transcriptome::project_transcript_embedded ( const Transcript cur_transcript,
const bdsg::PositionOverlay &  graph_path_pos_overlay,
const bool  reference_only 
) const
private

Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths.

◆ project_transcript_gbwt()

list< EditedTranscriptPath > vg::Transcriptome::project_transcript_gbwt ( const Transcript cur_transcript,
const gbwt::GBWT &  haplotype_index,
const float  mean_node_length 
) const
private

Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths.

◆ remove_non_transcribed()

void vg::Transcriptome::remove_non_transcribed ( const bool  new_reference_paths)

Removes non-transcribed (not in transcript paths) nodes. Optionally create new reference paths that only include trancribed nodes and edges.

◆ reorder_exons()

void vg::Transcriptome::reorder_exons ( Transcript transcript) const
private

Reverses exon order if the transcript is on the reverse strand and the exons are ordered in reverse.

◆ size()

int32_t vg::Transcriptome::size ( ) const

Returns number of transcript paths.

◆ splice_graph()

const MutablePathDeletableHandleGraph & vg::Transcriptome::splice_graph ( ) const

Returns spliced variation graph.

◆ splice_graph_node_updated()

bool vg::Transcriptome::splice_graph_node_updated ( ) const

Returns true if nodes in the spliced variation graph have been updated (e.g. split) since parsed.

◆ transcript_paths()

const vector< CompletedTranscriptPath > & vg::Transcriptome::transcript_paths ( ) const

Returns transcript paths.

◆ update_haplotype_index()

void vg::Transcriptome::update_haplotype_index ( unique_ptr< gbwt::GBWT > &  haplotype_index,
const vector< Translation > &  translation 
) const
private

Update threads in gbwt index using graph translations.

◆ write_info()

int32_t vg::Transcriptome::write_info ( ostream *  tsv_ostream,
const gbwt::GBWT &  haplotype_index,
const bool  output_reference_transcripts 
) const

Writes origin info on transcripts to tsv file. Returns number of written transcripts.

◆ write_sequences()

int32_t vg::Transcriptome::write_sequences ( ostream *  fasta_ostream,
const bool  output_reference_transcripts 
) const

Writes transcript path sequences to a fasta file.
Returns number of written sequences.

◆ write_splice_graph()

void vg::Transcriptome::write_splice_graph ( ostream *  graph_ostream) const

Writes spliced variation graph to vg file.

Member Data Documentation

◆ _splice_graph

unique_ptr<MutablePathDeletableHandleGraph> vg::Transcriptome::_splice_graph
private

Spliced variation graph.

◆ _splice_graph_node_updated

bool vg::Transcriptome::_splice_graph_node_updated
private

Have nodes in the spliced variation graph been updated (e.g. split) since parsed.

◆ _transcript_paths

vector<CompletedTranscriptPath> vg::Transcriptome::_transcript_paths
private

Transcriptome represented by a set of transcript paths.

◆ collapse_transcript_paths

bool vg::Transcriptome::collapse_transcript_paths = true

Collapse identical transcript paths.

◆ feature_type

string vg::Transcriptome::feature_type

Feature type to parse in the gtf/gff file. Parse all types if empty.

◆ mutex_splice_graph

mutex vg::Transcriptome::mutex_splice_graph
private

◆ mutex_transcript_paths

mutex vg::Transcriptome::mutex_transcript_paths
private

◆ num_threads

int32_t vg::Transcriptome::num_threads = 1

Number of threads used for transcript path construction.

◆ transcript_tag

string vg::Transcriptome::transcript_tag

Attribute tag used to parse the transcript id/name in the gtf/gff file.

◆ use_all_paths

bool vg::Transcriptome::use_all_paths = false

Use all paths embedded in the graph for transcript path construction.

◆ use_reference_paths

bool vg::Transcriptome::use_reference_paths = false

Use reference paths embedded in the graph for transcript path construction.


The documentation for this class was generated from the following files: