vg
tools for working with variation graphs
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes | List of all members
vg::Transcriptome Class Reference

#include <transcriptome.hpp>

Public Member Functions

 Transcriptome (const string &, const bool)
 
void add_transcripts (istream &transcript_stream, const gbwt::GBWT &haplotype_index)
 
const vector< TranscriptPath > & transcript_paths () const
 Returns transcript paths. More...
 
int32_t size () const
 Returns number of transcript paths. More...
 
const MutablePathDeletableHandleGraphsplice_graph () const
 Returns spliced variation graph. More...
 
void remove_non_transcribed (const bool new_reference_paths)
 
void compact_ordered ()
 Topological sort and compact graph. More...
 
void embed_transcript_paths (const bool add_reference_paths, const bool add_non_reference_paths)
 Embeds transcript paths in spliced variation graph. More...
 
void construct_gbwt (gbwt::GBWTBuilder *gbwt_builder, const bool output_reference_transcripts, const bool add_bidirectional) const
 Add transcript paths as threads in GBWT index. More...
 
void write_alignments (ostream *gam_ostream, const bool output_reference_transcripts) const
 Writes transcript paths as alignments to a gam file. More...
 
void write_sequences (ostream *fasta_ostream, const bool output_reference_transcripts)
 Writes transcript path sequences to a fasta file.
More...
 
void write_info (ostream *tsv_ostream, const bool output_reference_transcripts) const
 Writes origin info on transcripts to tsv file. More...
 
void write_splice_graph (ostream *graph_ostream)
 Writes spliced variation graph to vg file. More...
 

Public Attributes

int32_t num_threads = 1
 Number of threads used for transcript path construction. More...
 
string feature_type
 Feature type to parse in the gtf/gff file. Parse all types if empty. More...
 
string transcript_tag
 Attribute tag used to parse the transcript id/name in the gtf/gff file. More...
 
bool use_embedded_paths = false
 Use all paths embedded in the graph for transcript path construction. More...
 
bool use_reference_paths = false
 Use reference paths embedded in the graph for transcript path construction. More...
 
bool collapse_transcript_paths = true
 Collapse identical transcript paths. More...
 

Private Member Functions

void add_exon (Transcript *transcript, const pair< int32_t, int32_t > &exon_pos, const PathIndex &chrom_path_index) const
 
void reorder_exons (Transcript *transcript) const
 
list< TranscriptPathproject_transcripts (const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const float mean_node_length) const
 
void project_transcripts_callback (list< TranscriptPath > *proj_transcript_paths, mutex *transcript_paths_mutex, const int32_t thread_idx, const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const float mean_node_length) const
 Threaded transcript projecting. More...
 
list< TranscriptPathproject_transcript_gbwt (const Transcript &cur_transcript, const gbwt::GBWT &haplotype_index, const float mean_node_length) const
 Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths. More...
 
vector< pair< exon_nodes_t, thread_ids_t > > get_exon_haplotypes (const vg::id_t start_node, const vg::id_t end_node, const gbwt::GBWT &haplotype_index, const int32_t expected_length) const
 
list< TranscriptPathproject_transcript_embedded (const Transcript &cur_transcript) const
 Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths. More...
 
void append_transcript_paths (list< TranscriptPath > *cur_transcript_paths, list< TranscriptPath > *new_transcript_paths, const bool add_unqiue_paths_only) const
 Add new transcript paths to current set. Optionally add only unique paths. More...
 
void add_paths_to_transcriptome (list< TranscriptPath > *cur_transcript_paths)
 
bool add_novel_transcript_junctions (const list< TranscriptPath > &cur_transcript_paths)
 

Private Attributes

vector< TranscriptPath_transcript_paths
 Transcriptome represented by a set of transcript paths. More...
 
unique_ptr< MutablePathDeletableHandleGraph_splice_graph
 Spliced variation graph. More...
 

Detailed Description

Class that defines a transcriptome represented by a set of transcript paths.

Constructor & Destructor Documentation

◆ Transcriptome()

vg::Transcriptome::Transcriptome ( const string &  graph_filename,
const bool  show_progress 
)

Member Function Documentation

◆ add_exon()

void vg::Transcriptome::add_exon ( Transcript transcript,
const pair< int32_t, int32_t > &  exon_pos,
const PathIndex chrom_path_index 
) const
private

Finds the position of each end of a exon on a path in the
variation graph and adds the exon to a transcript.

◆ add_novel_transcript_junctions()

bool vg::Transcriptome::add_novel_transcript_junctions ( const list< TranscriptPath > &  cur_transcript_paths)
private

Adds novel splice-junctions in transcript paths to splice graph which does not require node splitting. Return false if a novel junction requires node splitting.

◆ add_paths_to_transcriptome()

void vg::Transcriptome::add_paths_to_transcriptome ( list< TranscriptPath > *  cur_transcript_paths)
private

Adds transcript paths to transcriptome. Augments the variation graph with transcript path splice-junctions and updates transcript path traversals to match the augmented graph if the paths contain any novel start/end sites or junctions.

◆ add_transcripts()

void vg::Transcriptome::add_transcripts ( istream &  transcript_stream,
const gbwt::GBWT &  haplotype_index 
)

Constructs transcript paths by projecting transcripts from a gtf/gff file onto embedded paths in a variation graph and/or haplotypes in a GBWT index.

◆ append_transcript_paths()

void vg::Transcriptome::append_transcript_paths ( list< TranscriptPath > *  cur_transcript_paths,
list< TranscriptPath > *  new_transcript_paths,
const bool  add_unqiue_paths_only 
) const
private

Add new transcript paths to current set. Optionally add only unique paths.

◆ compact_ordered()

void vg::Transcriptome::compact_ordered ( )

Topological sort and compact graph.

◆ construct_gbwt()

void vg::Transcriptome::construct_gbwt ( gbwt::GBWTBuilder *  gbwt_builder,
const bool  output_reference_transcripts,
const bool  add_bidirectional 
) const

Add transcript paths as threads in GBWT index.

◆ embed_transcript_paths()

void vg::Transcriptome::embed_transcript_paths ( const bool  add_reference_paths,
const bool  add_non_reference_paths 
)

Embeds transcript paths in spliced variation graph.

◆ get_exon_haplotypes()

vector< pair< exon_nodes_t, thread_ids_t > > vg::Transcriptome::get_exon_haplotypes ( const vg::id_t  start_node,
const vg::id_t  end_node,
const gbwt::GBWT &  haplotype_index,
const int32_t  expected_length 
) const
private

Extracts all unique haplotype paths between two nodes from a GBWT index and returns the resulting paths and the corresponding haplotype ids for each path.

◆ project_transcript_embedded()

list< TranscriptPath > vg::Transcriptome::project_transcript_embedded ( const Transcript cur_transcript) const
private

Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths.

◆ project_transcript_gbwt()

list< TranscriptPath > vg::Transcriptome::project_transcript_gbwt ( const Transcript cur_transcript,
const gbwt::GBWT &  haplotype_index,
const float  mean_node_length 
) const
private

Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths.

◆ project_transcripts()

list< TranscriptPath > vg::Transcriptome::project_transcripts ( const vector< Transcript > &  transcripts,
const gbwt::GBWT &  haplotype_index,
const float  mean_node_length 
) const
private

Constructs transcript paths by projecting transcripts onto embedded paths in a variation graph and/or haplotypes in a GBWT index.

◆ project_transcripts_callback()

void vg::Transcriptome::project_transcripts_callback ( list< TranscriptPath > *  proj_transcript_paths,
mutex *  transcript_paths_mutex,
const int32_t  thread_idx,
const vector< Transcript > &  transcripts,
const gbwt::GBWT &  haplotype_index,
const float  mean_node_length 
) const
private

Threaded transcript projecting.

◆ remove_non_transcribed()

void vg::Transcriptome::remove_non_transcribed ( const bool  new_reference_paths)

Removes non-transcribed (not in transcript paths) nodes. Optionally create new reference paths that only include trancribed nodes and edges.

◆ reorder_exons()

void vg::Transcriptome::reorder_exons ( Transcript transcript) const
private

Reverses exon order if the transcript is on the reverse strand and the exons are ordered in reverse.

◆ size()

int32_t vg::Transcriptome::size ( ) const

Returns number of transcript paths.

◆ splice_graph()

const MutablePathDeletableHandleGraph & vg::Transcriptome::splice_graph ( ) const

Returns spliced variation graph.

◆ transcript_paths()

const vector< TranscriptPath > & vg::Transcriptome::transcript_paths ( ) const

Returns transcript paths.

◆ write_alignments()

void vg::Transcriptome::write_alignments ( ostream *  gam_ostream,
const bool  output_reference_transcripts 
) const

Writes transcript paths as alignments to a gam file.

◆ write_info()

void vg::Transcriptome::write_info ( ostream *  tsv_ostream,
const bool  output_reference_transcripts 
) const

Writes origin info on transcripts to tsv file.

◆ write_sequences()

void vg::Transcriptome::write_sequences ( ostream *  fasta_ostream,
const bool  output_reference_transcripts 
)

Writes transcript path sequences to a fasta file.

◆ write_splice_graph()

void vg::Transcriptome::write_splice_graph ( ostream *  graph_ostream)

Writes spliced variation graph to vg file.

Member Data Documentation

◆ _splice_graph

unique_ptr<MutablePathDeletableHandleGraph> vg::Transcriptome::_splice_graph
private

Spliced variation graph.

◆ _transcript_paths

vector<TranscriptPath> vg::Transcriptome::_transcript_paths
private

Transcriptome represented by a set of transcript paths.

◆ collapse_transcript_paths

bool vg::Transcriptome::collapse_transcript_paths = true

Collapse identical transcript paths.

◆ feature_type

string vg::Transcriptome::feature_type

Feature type to parse in the gtf/gff file. Parse all types if empty.

◆ num_threads

int32_t vg::Transcriptome::num_threads = 1

Number of threads used for transcript path construction.

◆ transcript_tag

string vg::Transcriptome::transcript_tag

Attribute tag used to parse the transcript id/name in the gtf/gff file.

◆ use_embedded_paths

bool vg::Transcriptome::use_embedded_paths = false

Use all paths embedded in the graph for transcript path construction.

◆ use_reference_paths

bool vg::Transcriptome::use_reference_paths = false

Use reference paths embedded in the graph for transcript path construction.


The documentation for this class was generated from the following files: