20 #include "SamQuerySeqWithRefHelper.h" 21 #include "BaseUtilities.h" 24 SamQuerySeqWithRefIter::SamQuerySeqWithRefIter(
SamRecord& record,
28 myRefSequence(refSequence),
30 myStartOfReadOnRefIndex(INVALID_GENOME_INDEX),
34 myCigar = myRecord.getCigarInfo();
35 myStartOfReadOnRefIndex =
38 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX)
42 myStartOfReadOnRefIndex += myRecord.get0BasedPosition();
47 myQueryIndex = myRecord.getReadLength() - 1;
52 SamQuerySeqWithRefIter::~SamQuerySeqWithRefIter()
60 myCigar = myRecord.getCigarInfo();
69 myStartOfReadOnRefIndex =
70 myRefSequence.getGenomePosition(myRecord.getReferenceName());
72 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX)
76 myStartOfReadOnRefIndex += myRecord.get0BasedPosition();
88 myQueryIndex = myRecord.getReadLength() - 1;
106 if(!SamFlag::isMapped(myRecord.getFlag()))
116 throw(std::runtime_error(
"Cannot determine matches/mismatches since failed to retrieve the cigar"));
122 if(myStartOfReadOnRefIndex == INVALID_GENOME_INDEX)
134 while((myQueryIndex < myRecord.getReadLength()) &&
140 int32_t refOffset = myCigar->getRefOffset(myQueryIndex);
152 char refBase = myRefSequence[myStartOfReadOnRefIndex + refOffset];
173 matchMismatchInfo.
setType(SamSingleBaseMatchInfo::MATCH);
181 matchMismatchInfo.
setType(SamSingleBaseMatchInfo::MISMATCH);
193 void SamQuerySeqWithRefIter::nextIndex()
206 SamSingleBaseMatchInfo::SamSingleBaseMatchInfo()
213 SamSingleBaseMatchInfo::~SamSingleBaseMatchInfo()
226 return(myQueryIndex);
238 myQueryIndex = queryIndex;
244 int32_t seq0BasedPos,
246 const char* referenceName,
248 std::string& updatedSeq)
250 updatedSeq = currentSeq;
252 int32_t seqLength = updatedSeq.length();
253 int32_t queryIndex = 0;
255 uint32_t startOfReadOnRefIndex =
258 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
264 startOfReadOnRefIndex += seq0BasedPos;
267 while(queryIndex < seqLength)
276 char readBase = currentSeq[queryIndex];
277 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
286 updatedSeq[queryIndex] =
'=';
297 int32_t seq0BasedPos,
299 const char* referenceName,
301 std::string& updatedSeq)
303 updatedSeq = currentSeq;
305 int32_t seqLength = updatedSeq.length();
306 int32_t queryIndex = 0;
308 uint32_t startOfReadOnRefIndex =
311 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
317 startOfReadOnRefIndex += seq0BasedPos;
320 while(queryIndex < seqLength)
329 char readBase = currentSeq[queryIndex];
330 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
339 updatedSeq[queryIndex] = refBase;
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
bool reset(bool forward=true)
Reset to start at the beginning of the record.
void setType(Type newType)
Set the type (match/mismatch/unkown) for this object.
void setQueryIndex(int32_t queryIndex)
Set the query index for this object.
static bool areEqual(char base1, char base2)
Returns whether or not two bases are equal (case insensitive), if one of the bases is '='...
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
Type getType()
Get the type (match/mismatch/unknown) for this object.
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch...
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that)...
int32_t getRefOffset(int32_t queryIndex)
Return the reference offset associated with the specified query index or INDEX_NA based on this cigar...
Leave the sequence as is.
int32_t getQueryIndex()
Get the query index for this object.
Type
More types can be added later as needed.
static bool isAmbiguous(char base)
Returns whether or not the specified bases is an indicator for ambiguity.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record...
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
This class contains the match/mismatch information between the reference and a read for a single base...
genomeIndex_t getGenomePosition(const char *chromosomeName, unsigned int chromosomeIndex) const
given a chromosome name and position, return the genome position