vg
tools for working with variation graphs
|
#include <transcriptome.hpp>
Public Member Functions | |
Transcriptome (const string &, const bool) | |
int32_t | add_intron_splice_junctions (istream &intron_stream, unique_ptr< gbwt::GBWT > &haplotype_index) |
int32_t | add_transcript_splice_junctions (istream &transcript_stream, unique_ptr< gbwt::GBWT > &haplotype_index) |
int32_t | add_transcripts (istream &transcript_stream, const gbwt::GBWT &haplotype_index) |
const vector< CompletedTranscriptPath > & | transcript_paths () const |
Returns transcript paths. More... | |
int32_t | size () const |
Returns number of transcript paths. More... | |
const MutablePathDeletableHandleGraph & | splice_graph () const |
Returns spliced variation graph. More... | |
bool | splice_graph_node_updated () const |
void | remove_non_transcribed (const bool new_reference_paths) |
void | compact_ordered () |
Topological sort and compact graph. More... | |
int32_t | embed_transcript_paths (const bool add_reference_paths, const bool add_non_reference_paths) |
int32_t | add_transcripts_to_gbwt (gbwt::GBWTBuilder *gbwt_builder, const bool output_reference_transcripts, const bool add_bidirectional) const |
int32_t | write_sequences (ostream *fasta_ostream, const bool output_reference_transcripts) const |
int32_t | write_info (ostream *tsv_ostream, const gbwt::GBWT &haplotype_index, const bool output_reference_transcripts) const |
void | write_splice_graph (ostream *graph_ostream) const |
Writes spliced variation graph to vg file. More... | |
Public Attributes | |
int32_t | num_threads = 1 |
Number of threads used for transcript path construction. More... | |
string | feature_type |
Feature type to parse in the gtf/gff file. Parse all types if empty. More... | |
string | transcript_tag |
Attribute tag used to parse the transcript id/name in the gtf/gff file. More... | |
bool | use_all_paths = false |
Use all paths embedded in the graph for transcript path construction. More... | |
bool | use_reference_paths = false |
Use reference paths embedded in the graph for transcript path construction. More... | |
bool | collapse_transcript_paths = true |
Collapse identical transcript paths. More... | |
Private Member Functions | |
vector< Transcript > | parse_introns (istream &intron_stream, const bdsg::PositionOverlay &graph_path_pos_overlay) const |
Parse BED file of introns. More... | |
vector< Transcript > | parse_transcripts (istream &transcript_stream, const bdsg::PositionOverlay &graph_path_pos_overlay) const |
Parse gtf/gff3 file of transcripts. More... | |
float | mean_node_length () const |
Returns the mean node length of the graph. More... | |
void | add_exon (Transcript *transcript, const pair< int32_t, int32_t > &exon_pos, const bdsg::PositionOverlay &graph_path_pos_overlay) const |
void | reorder_exons (Transcript *transcript) const |
list< EditedTranscriptPath > | construct_edited_transcript_paths (const vector< Transcript > &transcripts, const bdsg::PositionOverlay &graph_path_pos_overlay) const |
void | construct_edited_transcript_paths_callback (list< EditedTranscriptPath > *edited_transcript_paths, mutex *edited_transcript_paths_mutex, const bdsg::PositionOverlay &graph_path_pos_overlay, const int32_t thread_idx, const vector< Transcript > &transcripts) const |
Threaded edited transcript path construction. More... | |
void | project_and_add_transcripts (const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const bdsg::PositionOverlay &graph_path_pos_overlay, const float mean_node_length) |
void | project_and_add_transcripts_callback (const int32_t thread_idx, const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const bdsg::PositionOverlay &graph_path_pos_overlay, const float mean_node_length) |
Threaded transcript projecting. More... | |
list< EditedTranscriptPath > | project_transcript_gbwt (const Transcript &cur_transcript, const gbwt::GBWT &haplotype_index, const float mean_node_length) const |
Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths. More... | |
vector< pair< exon_nodes_t, thread_ids_t > > | get_exon_haplotypes (const vg::id_t start_node, const vg::id_t end_node, const gbwt::GBWT &haplotype_index, const int32_t expected_length) const |
list< EditedTranscriptPath > | project_transcript_embedded (const Transcript &cur_transcript, const bdsg::PositionOverlay &graph_path_pos_overlay, const bool reference_only) const |
Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths. More... | |
void | append_transcript_paths (list< CompletedTranscriptPath > *completed_transcript_path, list< CompletedTranscriptPath > *new_completed_transcript_paths, const bool add_unqiue_paths_only) const |
Adds new transcript paths to current set. Has argument to only add unique paths. More... | |
list< CompletedTranscriptPath > | construct_completed_transcript_paths (const list< EditedTranscriptPath > &edited_transcript_paths) const |
vector< handle_t > | path_to_handles (const Path &path) const |
bool | has_novel_exon_boundaries (const list< EditedTranscriptPath > &edited_transcript_paths, const bool include_transcript_ends) const |
void | augment_splice_graph (list< EditedTranscriptPath > *edited_transcript_paths, unique_ptr< gbwt::GBWT > &haplotype_index, const bool break_at_transcript_ends) |
void | update_haplotype_index (unique_ptr< gbwt::GBWT > &haplotype_index, const vector< Translation > &translation) const |
Update threads in gbwt index using graph translations. More... | |
void | add_splice_junction_edges (const list< EditedTranscriptPath > &edited_transcript_paths) |
Adds transcript path splice-junction edges to splice graph. More... | |
void | add_splice_junction_edges (const vector< CompletedTranscriptPath > &completed_transcript_paths) |
Private Attributes | |
vector< CompletedTranscriptPath > | _transcript_paths |
Transcriptome represented by a set of transcript paths. More... | |
mutex | mutex_transcript_paths |
unique_ptr< MutablePathDeletableHandleGraph > | _splice_graph |
Spliced variation graph. More... | |
mutex | mutex_splice_graph |
bool | _splice_graph_node_updated |
Class that defines a transcriptome represented by a set of transcript paths.
vg::Transcriptome::Transcriptome | ( | const string & | graph_filename, |
const bool | show_progress | ||
) |
|
private |
Finds the position of each end of a exon on a path in the
variation graph and adds the exon to a transcript.
int32_t vg::Transcriptome::add_intron_splice_junctions | ( | istream & | intron_stream, |
unique_ptr< gbwt::GBWT > & | haplotype_index | ||
) |
Add splice-junstions from a intron BED file. Returns number of parsed introns.
|
private |
Adds transcript path splice-junction edges to splice graph.
|
private |
int32_t vg::Transcriptome::add_transcript_splice_junctions | ( | istream & | transcript_stream, |
unique_ptr< gbwt::GBWT > & | haplotype_index | ||
) |
Add splice-junstions from a transcript gtf/gff3 file. Returns number of parsed transcripts.
int32_t vg::Transcriptome::add_transcripts | ( | istream & | transcript_stream, |
const gbwt::GBWT & | haplotype_index | ||
) |
Constructs transcript paths by projecting transcripts from a gtf/gff file onto embedded paths in a variation graph and/or haplotypes in a GBWT index. Augments graph with transcriptome splice-junctions. Returns number of transcript paths added.
int32_t vg::Transcriptome::add_transcripts_to_gbwt | ( | gbwt::GBWTBuilder * | gbwt_builder, |
const bool | output_reference_transcripts, | ||
const bool | add_bidirectional | ||
) | const |
Add transcript paths as threads in GBWT index. Returns number of added threads.
|
private |
Adds new transcript paths to current set. Has argument to only add unique paths.
|
private |
Augments the variation graph with transcript path exon boundaries and splice-junctions. Updates threads in gbwt index to match the augmented graph.
void vg::Transcriptome::compact_ordered | ( | ) |
Topological sort and compact graph.
|
private |
Constructs completed transcripts paths from edited transcript paths. Checks that the paths contain no edits compared to the graph.
|
private |
Constructs edited transcript paths from a set of reference transcripts.
|
private |
Threaded edited transcript path construction.
int32_t vg::Transcriptome::embed_transcript_paths | ( | const bool | add_reference_paths, |
const bool | add_non_reference_paths | ||
) |
Embeds transcript paths in spliced variation graph.
Returns number of paths embedded.
|
private |
Extracts all unique haplotype paths between two nodes from a GBWT index and returns the resulting paths and the corresponding haplotype ids for each path.
|
private |
Checks whether transcript path only consist of whole nodes (complete).
|
private |
Returns the mean node length of the graph.
|
private |
Parse BED file of introns.
|
private |
Parse gtf/gff3 file of transcripts.
Convert a path to a vector of handles. Checks that the path is complete (i.e. only consist of whole nodes).
|
private |
Constructs transcript paths by projecting transcripts onto embedded paths in a variation graph and/or haplotypes in a GBWT index.
|
private |
Threaded transcript projecting.
|
private |
Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths.
|
private |
Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths.
void vg::Transcriptome::remove_non_transcribed | ( | const bool | new_reference_paths | ) |
Removes non-transcribed (not in transcript paths) nodes. Optionally create new reference paths that only include trancribed nodes and edges.
|
private |
Reverses exon order if the transcript is on the reverse strand and the exons are ordered in reverse.
int32_t vg::Transcriptome::size | ( | ) | const |
Returns number of transcript paths.
const MutablePathDeletableHandleGraph & vg::Transcriptome::splice_graph | ( | ) | const |
Returns spliced variation graph.
bool vg::Transcriptome::splice_graph_node_updated | ( | ) | const |
Returns true if nodes in the spliced variation graph have been updated (e.g. split) since parsed.
const vector< CompletedTranscriptPath > & vg::Transcriptome::transcript_paths | ( | ) | const |
Returns transcript paths.
|
private |
Update threads in gbwt index using graph translations.
int32_t vg::Transcriptome::write_info | ( | ostream * | tsv_ostream, |
const gbwt::GBWT & | haplotype_index, | ||
const bool | output_reference_transcripts | ||
) | const |
Writes origin info on transcripts to tsv file. Returns number of written transcripts.
int32_t vg::Transcriptome::write_sequences | ( | ostream * | fasta_ostream, |
const bool | output_reference_transcripts | ||
) | const |
Writes transcript path sequences to a fasta file.
Returns number of written sequences.
void vg::Transcriptome::write_splice_graph | ( | ostream * | graph_ostream | ) | const |
Writes spliced variation graph to vg file.
|
private |
Spliced variation graph.
|
private |
Have nodes in the spliced variation graph been updated (e.g. split) since parsed.
|
private |
Transcriptome represented by a set of transcript paths.
bool vg::Transcriptome::collapse_transcript_paths = true |
Collapse identical transcript paths.
string vg::Transcriptome::feature_type |
Feature type to parse in the gtf/gff file. Parse all types if empty.
|
private |
|
private |
int32_t vg::Transcriptome::num_threads = 1 |
Number of threads used for transcript path construction.
string vg::Transcriptome::transcript_tag |
Attribute tag used to parse the transcript id/name in the gtf/gff file.
bool vg::Transcriptome::use_all_paths = false |
Use all paths embedded in the graph for transcript path construction.
bool vg::Transcriptome::use_reference_paths = false |
Use reference paths embedded in the graph for transcript path construction.