vg
tools for working with variation graphs
|
#include <transcriptome.hpp>
Public Member Functions | |
Transcriptome (const string &, const bool) | |
void | add_transcripts (istream &transcript_stream, const gbwt::GBWT &haplotype_index) |
const vector< TranscriptPath > & | transcript_paths () const |
Returns transcript paths. More... | |
int32_t | size () const |
Returns number of transcript paths. More... | |
const MutablePathDeletableHandleGraph & | splice_graph () const |
Returns spliced variation graph. More... | |
void | remove_non_transcribed (const bool new_reference_paths) |
void | compact_ordered () |
Topological sort and compact graph. More... | |
void | embed_transcript_paths (const bool add_reference_paths, const bool add_non_reference_paths) |
Embeds transcript paths in spliced variation graph. More... | |
void | construct_gbwt (gbwt::GBWTBuilder *gbwt_builder, const bool output_reference_transcripts, const bool add_bidirectional) const |
Add transcript paths as threads in GBWT index. More... | |
void | write_alignments (ostream *gam_ostream, const bool output_reference_transcripts) const |
Writes transcript paths as alignments to a gam file. More... | |
void | write_sequences (ostream *fasta_ostream, const bool output_reference_transcripts) |
Writes transcript path sequences to a fasta file. More... | |
void | write_info (ostream *tsv_ostream, const bool output_reference_transcripts) const |
Writes origin info on transcripts to tsv file. More... | |
void | write_splice_graph (ostream *graph_ostream) |
Writes spliced variation graph to vg file. More... | |
Public Attributes | |
int32_t | num_threads = 1 |
Number of threads used for transcript path construction. More... | |
string | feature_type |
Feature type to parse in the gtf/gff file. Parse all types if empty. More... | |
string | transcript_tag |
Attribute tag used to parse the transcript id/name in the gtf/gff file. More... | |
bool | use_embedded_paths = false |
Use all paths embedded in the graph for transcript path construction. More... | |
bool | use_reference_paths = false |
Use reference paths embedded in the graph for transcript path construction. More... | |
bool | collapse_transcript_paths = true |
Collapse identical transcript paths. More... | |
Private Member Functions | |
void | add_exon (Transcript *transcript, const pair< int32_t, int32_t > &exon_pos, const PathIndex &chrom_path_index) const |
void | reorder_exons (Transcript *transcript) const |
list< TranscriptPath > | project_transcripts (const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const float mean_node_length) const |
void | project_transcripts_callback (list< TranscriptPath > *proj_transcript_paths, mutex *transcript_paths_mutex, const int32_t thread_idx, const vector< Transcript > &transcripts, const gbwt::GBWT &haplotype_index, const float mean_node_length) const |
Threaded transcript projecting. More... | |
list< TranscriptPath > | project_transcript_gbwt (const Transcript &cur_transcript, const gbwt::GBWT &haplotype_index, const float mean_node_length) const |
Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths. More... | |
vector< pair< exon_nodes_t, thread_ids_t > > | get_exon_haplotypes (const vg::id_t start_node, const vg::id_t end_node, const gbwt::GBWT &haplotype_index, const int32_t expected_length) const |
list< TranscriptPath > | project_transcript_embedded (const Transcript &cur_transcript) const |
Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths. More... | |
void | append_transcript_paths (list< TranscriptPath > *cur_transcript_paths, list< TranscriptPath > *new_transcript_paths, const bool add_unqiue_paths_only) const |
Add new transcript paths to current set. Optionally add only unique paths. More... | |
void | add_paths_to_transcriptome (list< TranscriptPath > *cur_transcript_paths) |
bool | add_novel_transcript_junctions (const list< TranscriptPath > &cur_transcript_paths) |
Private Attributes | |
vector< TranscriptPath > | _transcript_paths |
Transcriptome represented by a set of transcript paths. More... | |
unique_ptr< MutablePathDeletableHandleGraph > | _splice_graph |
Spliced variation graph. More... | |
Class that defines a transcriptome represented by a set of transcript paths.
vg::Transcriptome::Transcriptome | ( | const string & | graph_filename, |
const bool | show_progress | ||
) |
|
private |
Finds the position of each end of a exon on a path in the
variation graph and adds the exon to a transcript.
|
private |
Adds novel splice-junctions in transcript paths to splice graph which does not require node splitting. Return false if a novel junction requires node splitting.
|
private |
Adds transcript paths to transcriptome. Augments the variation graph with transcript path splice-junctions and updates transcript path traversals to match the augmented graph if the paths contain any novel start/end sites or junctions.
void vg::Transcriptome::add_transcripts | ( | istream & | transcript_stream, |
const gbwt::GBWT & | haplotype_index | ||
) |
Constructs transcript paths by projecting transcripts from a gtf/gff file onto embedded paths in a variation graph and/or haplotypes in a GBWT index.
|
private |
Add new transcript paths to current set. Optionally add only unique paths.
void vg::Transcriptome::compact_ordered | ( | ) |
Topological sort and compact graph.
void vg::Transcriptome::construct_gbwt | ( | gbwt::GBWTBuilder * | gbwt_builder, |
const bool | output_reference_transcripts, | ||
const bool | add_bidirectional | ||
) | const |
Add transcript paths as threads in GBWT index.
void vg::Transcriptome::embed_transcript_paths | ( | const bool | add_reference_paths, |
const bool | add_non_reference_paths | ||
) |
Embeds transcript paths in spliced variation graph.
|
private |
Extracts all unique haplotype paths between two nodes from a GBWT index and returns the resulting paths and the corresponding haplotype ids for each path.
|
private |
Projects transcripts onto embedded paths in a variation graph and returns resulting transcript paths.
|
private |
Projects transcripts onto haplotypes in a GBWT index and returns resulting transcript paths.
|
private |
Constructs transcript paths by projecting transcripts onto embedded paths in a variation graph and/or haplotypes in a GBWT index.
|
private |
Threaded transcript projecting.
void vg::Transcriptome::remove_non_transcribed | ( | const bool | new_reference_paths | ) |
Removes non-transcribed (not in transcript paths) nodes. Optionally create new reference paths that only include trancribed nodes and edges.
|
private |
Reverses exon order if the transcript is on the reverse strand and the exons are ordered in reverse.
int32_t vg::Transcriptome::size | ( | ) | const |
Returns number of transcript paths.
const MutablePathDeletableHandleGraph & vg::Transcriptome::splice_graph | ( | ) | const |
Returns spliced variation graph.
const vector< TranscriptPath > & vg::Transcriptome::transcript_paths | ( | ) | const |
Returns transcript paths.
void vg::Transcriptome::write_alignments | ( | ostream * | gam_ostream, |
const bool | output_reference_transcripts | ||
) | const |
Writes transcript paths as alignments to a gam file.
void vg::Transcriptome::write_info | ( | ostream * | tsv_ostream, |
const bool | output_reference_transcripts | ||
) | const |
Writes origin info on transcripts to tsv file.
void vg::Transcriptome::write_sequences | ( | ostream * | fasta_ostream, |
const bool | output_reference_transcripts | ||
) |
Writes transcript path sequences to a fasta file.
void vg::Transcriptome::write_splice_graph | ( | ostream * | graph_ostream | ) |
Writes spliced variation graph to vg file.
|
private |
Spliced variation graph.
|
private |
Transcriptome represented by a set of transcript paths.
bool vg::Transcriptome::collapse_transcript_paths = true |
Collapse identical transcript paths.
string vg::Transcriptome::feature_type |
Feature type to parse in the gtf/gff file. Parse all types if empty.
int32_t vg::Transcriptome::num_threads = 1 |
Number of threads used for transcript path construction.
string vg::Transcriptome::transcript_tag |
Attribute tag used to parse the transcript id/name in the gtf/gff file.
bool vg::Transcriptome::use_embedded_paths = false |
Use all paths embedded in the graph for transcript path construction.
bool vg::Transcriptome::use_reference_paths = false |
Use reference paths embedded in the graph for transcript path construction.