#include <constructor.hpp>
|
ConstructedChunk | construct_chunk (string reference_sequence, string reference_path_name, vector< vcflib::Variant > variants, size_t chunk_offset) const |
|
void | construct_graph (string vcf_contig, FastaReference &reference, VcfBuffer &variant_source, const vector< FastaReference * > &insertion, const function< void(Graph &)> &callback) |
|
void | construct_graph (const vector< FastaReference * > &references, const vector< vcflib::VariantCallFile * > &variant_files, const vector< FastaReference * > &insertions, const function< void(Graph &)> &callback) |
|
void | construct_graph (const vector< string > &reference_filenames, const vector< string > &variant_filenames, const vector< string > &insertion_filenames, const function< void(Graph &)> &callback) |
|
void | construct_graph (const vector< FastaReference * > &references, const vector< vcflib::VariantCallFile * > &variant_files, const vector< FastaReference * > &insertions, MutablePathMutableHandleGraph *destination) |
|
void | construct_graph (const vector< string > &reference_filenames, const vector< string > &variant_filenames, const vector< string > &insertion_filenames, MutablePathMutableHandleGraph *destination) |
|
void | preload_progress (const string &message) |
|
void | create_progress (const string &message, long count) |
|
void | create_progress (long count) |
|
void | update_progress (long i) |
|
void | increment_progress () |
|
void | destroy_progress (void) |
|
void | add_name_mapping (const string &vcf_name, const string &fasta_name) |
|
string | vcf_to_fasta (const string &vcf_name) const |
|
string | fasta_to_vcf (const string &fasta_name) const |
|
|
static void | trim_to_variable (vector< list< vcflib::VariantAllele >> &parsed_alleles) |
|
static void | condense_edits (list< vcflib::VariantAllele > &parsed_allele) |
|
static pair< int64_t, int64_t > | get_bounds (const vector< list< vcflib::VariantAllele >> &trimmed_variant) |
|
static pair< int64_t, int64_t > | get_symbolic_bounds (vcflib::Variant var) |
|
◆ condense_edits()
void vg::Constructor::condense_edits |
( |
list< vcflib::VariantAllele > & |
parsed_allele | ) |
|
|
staticprivate |
Given a list of VariantAllele edits, condense adjacent perfect match edits to be maximally long.
◆ construct_chunk()
ConstructedChunk vg::Constructor::construct_chunk |
( |
string |
reference_sequence, |
|
|
string |
reference_path_name, |
|
|
vector< vcflib::Variant > |
variants, |
|
|
size_t |
chunk_offset |
|
) |
| const |
Construct a ConstructedChunk of graph from the given piece of sequence, with the given name, applying the given variants. The variants need to be sorted by start position, and have their start positions set to be ZERO- BASED. However, they also need to have their start positions relative to the global start of the contig, so that hash-based names come out right for them. They also need to not overlap with any variants not in the vector we have (i.e. we need access to all overlapping variants for this region). The variants must not extend beyond the given sequence, though they can abut its edges.
Variants in the vector may not use symbolic alleles.
chunk_offset gives the global 0-based position at which this chunk starts in the reference contig it is part of, which is used to correctly place variants.
◆ construct_graph() [1/5]
void vg::Constructor::construct_graph |
( |
const vector< FastaReference * > & |
references, |
|
|
const vector< vcflib::VariantCallFile * > & |
variant_files, |
|
|
const vector< FastaReference * > & |
insertions, |
|
|
const function< void(Graph &)> & |
callback |
|
) |
| |
Construct a graph using the given FASTA references and VCFlib VCF files. The VCF files are assumed to be grouped by contig and then sorted by position within the contig, such that each contig is present in only one file. If multiple FASTAs are used, each contig must be present in only one FASTA file. Reference and VCF vectors may not contain nulls.
insertions contains FASTAs containing serquences for resolving symbolic insert alleles in the VCFs.
Calls the given callback with constructed graph chunks, eventually (hopefully) in multiple threads. Chunks may contain dangling edges into the next chunk.
◆ construct_graph() [2/5]
void vg::Constructor::construct_graph |
( |
const vector< FastaReference * > & |
references, |
|
|
const vector< vcflib::VariantCallFile * > & |
variant_files, |
|
|
const vector< FastaReference * > & |
insertions, |
|
|
MutablePathMutableHandleGraph * |
destination |
|
) |
| |
Construct a graph using the given FASTA references and VCFlib VCF files. The VCF files are assumed to be grouped by contig and then sorted by position within the contig, such that each contig is present in only one file. If multiple FASTAs are used, each contig must be present in only one FASTA file. Reference and VCF vectors may not contain nulls.
insertions contains FASTAs containing serquences for resolving symbolic insert alleles in the VCFs.
Builds the graph into the given mutable graph object, which may not be thread safe.
◆ construct_graph() [3/5]
void vg::Constructor::construct_graph |
( |
const vector< string > & |
reference_filenames, |
|
|
const vector< string > & |
variant_filenames, |
|
|
const vector< string > & |
insertion_filenames, |
|
|
const function< void(Graph &)> & |
callback |
|
) |
| |
Construct a graph using the given FASTA references and VCF files on disk. The VCF files are assumed to be grouped by contig and then sorted by position within the contig, such that each contig is present in only one file. If multiple FASTAs are used, each contig must be present in only one FASTA file.
insertions contains FASTA filenames containing serquences for resolving symbolic insert alleles in the VCFs.
Calls the given callback with constructed graph chunks, eventually (hopefully) in multiple threads. Chunks may contain dangling edges into the next chunk.
◆ construct_graph() [4/5]
void vg::Constructor::construct_graph |
( |
const vector< string > & |
reference_filenames, |
|
|
const vector< string > & |
variant_filenames, |
|
|
const vector< string > & |
insertion_filenames, |
|
|
MutablePathMutableHandleGraph * |
destination |
|
) |
| |
Construct a graph using the given FASTA references and VCF files on disk. The VCF files are assumed to be grouped by contig and then sorted by position within the contig, such that each contig is present in only one file. If multiple FASTAs are used, each contig must be present in only one FASTA file.
insertions contains FASTA filenames containing serquences for resolving symbolic insert alleles in the VCFs.
Builds the graph into the given mutable graph object, which may not be thread safe.
◆ construct_graph() [5/5]
void vg::Constructor::construct_graph |
( |
string |
vcf_contig, |
|
|
FastaReference & |
reference, |
|
|
VcfBuffer & |
variant_source, |
|
|
const vector< FastaReference * > & |
insertion, |
|
|
const function< void(Graph &)> & |
callback |
|
) |
| |
Construct a graph for the given VCF contig name, using the given reference and the variants from the given buffered VCF file. Emits a sequence of Graph chunks, which may be too big to serealize directly.
Doesn't handle any of the setup for VCF indexing. Just scans all the variants that can come out of the buffer, so make sure indexing is set on the file first before passing it in.
insertion contains FASTAs containing serquences for resolving symbolic insert alleles in the VCF.
Calls the given callback with constructed graph chunks, in a single thread. Chunks may contain dangling edges into the next chunk.
◆ get_bounds()
pair< int64_t, int64_t > vg::Constructor::get_bounds |
( |
const vector< list< vcflib::VariantAllele >> & |
trimmed_variant | ) |
|
|
staticprivate |
Given a vector of lists of VariantAllele edits that have been trimmed with trim_to_variable() above, one per non-reference alt for a variant, return the position of the first varaible base, and the position of the last variable base. If there's no variable-region, the result is max int64_t and -1, and if there's a 0-length variable region, the result is the base after it and the base before it.
◆ get_symbolic_bounds()
pair< int64_t, int64_t > vg::Constructor::get_symbolic_bounds |
( |
vcflib::Variant |
var | ) |
|
|
staticprivate |
Given a symbolic variant, check its bounds and return them. This function is needed to handle SVs properly, since they won't always have their ref and alt fields put in. Note that insertions may have an end bound before their start, because the anchoring base isn't included.
◆ trim_to_variable()
void vg::Constructor::trim_to_variable |
( |
vector< list< vcflib::VariantAllele >> & |
parsed_alleles | ) |
|
|
staticprivate |
Given a vector of lists of VariantAllele edits, trim in from the left and right, leaving a core of edits bounded by edits that actually change the reference in at least one allele.
Postcondition: either all lists of VariantAlleles are empty, or at least one begins with a non-match and at least one ends with a non-match. Adjacent edits in the list abut; there are no uncovered gaps in the edits. This means that internal perfect match edits will be preserved.
◆ allowed_vcf_names
set<string> vg::Constructor::allowed_vcf_names |
◆ allowed_vcf_regions
map<string, pair<size_t, size_t> > vg::Constructor::allowed_vcf_regions |
◆ alt_paths
bool vg::Constructor::alt_paths = false |
◆ alts_as_loci
bool vg::Constructor::alts_as_loci = false |
◆ ambiguous_warned_sequences
unordered_set<string> vg::Constructor::ambiguous_warned_sequences |
|
mutableprivate |
What sequences have we warned about containing unsupported ambiguity codes?
◆ bases_per_chunk
size_t vg::Constructor::bases_per_chunk = 1024 * 1024 |
◆ chain_deletions
bool vg::Constructor::chain_deletions = true |
◆ do_svs
bool vg::Constructor::do_svs = false |
◆ flat
bool vg::Constructor::flat = false |
◆ greedy_pieces
bool vg::Constructor::greedy_pieces = false |
◆ lowercase_warned_alt
bool vg::Constructor::lowercase_warned_alt = false |
|
mutableprivate |
Have we given a warning yet about lowercase alt alleles?
◆ lowercase_warned_sequences
unordered_set<string> vg::Constructor::lowercase_warned_sequences |
|
mutableprivate |
What sequences have we warned about containing lowercase characters?
◆ max_id
id_t vg::Constructor::max_id = 0 |
|
protected |
All chunks are generated with IDs starting at 1, but graphs emitted from construct_graph need to have the IDs rewritten so they don't overlap. Moreover, multiple calls to construct_graph need to not have conflicting IDs, because some construct_graph implementations call other ones. What we do for now is globally track the max ID already used, so all calls to construct_graph follow a single ID ordering.
◆ max_node_size
size_t vg::Constructor::max_node_size = 1000 |
◆ symbolic_allele_warnings
set<string> vg::Constructor::symbolic_allele_warnings |
|
protected |
Remembers which unusable symbolic alleles we've already emitted a warning about during construction.
◆ trim_indels
bool vg::Constructor::trim_indels = true |
◆ vars_per_chunk
size_t vg::Constructor::vars_per_chunk = 1024 |
◆ warn_on_ambiguous
bool vg::Constructor::warn_on_ambiguous = true |
◆ warn_on_lowercase
bool vg::Constructor::warn_on_lowercase = true |
The documentation for this class was generated from the following files: