21 #include "SamFilter.h" 23 #include "SamQuerySeqWithRefHelper.h" 24 #include "BaseUtilities.h" 29 double mismatchThreshold)
39 const int32_t initialLastFrontClipPos = -1;
40 int32_t lastFrontClipPos = initialLastFrontClipPos;
42 int32_t firstBackClipPos = readLength;
44 bool fromFrontComplete =
false;
45 bool fromBackComplete =
false;
46 int32_t numBasesFromFront = 0;
47 int32_t numBasesFromBack = 0;
48 int32_t numMismatchFromFront = 0;
49 int32_t numMismatchFromBack = 0;
53 while(!fromFrontComplete || !fromBackComplete)
57 while(!fromFrontComplete &&
58 ((numBasesFromFront <= numBasesFromBack) ||
64 fromFrontComplete =
true;
72 fromFrontComplete =
true;
79 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
82 ++numMismatchFromFront;
84 double mismatchPercent =
85 (double)numMismatchFromFront / numBasesFromFront;
86 if(mismatchPercent > mismatchThreshold)
91 numBasesFromFront = 0;
92 numMismatchFromFront = 0;
99 while(!fromBackComplete &&
100 ((numBasesFromBack <= numBasesFromFront) ||
101 (fromFrontComplete)))
106 fromBackComplete =
true;
114 fromBackComplete =
true;
121 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
124 ++numMismatchFromBack;
126 double mismatchPercent =
127 (double)numMismatchFromBack / numBasesFromBack;
128 if(mismatchPercent > mismatchThreshold)
133 numBasesFromBack = 0;
134 numMismatchFromBack = 0;
150 return(
softClip(record, lastFrontClipPos + 1, readLength - firstBackClipPos));
156 int32_t numFrontClips,
157 int32_t numBackClips)
165 status =
softClip(*cigar, numFrontClips, numBackClips,
166 startPos, updatedCigar);
192 int32_t numFrontClips,
193 int32_t numBackClips,
198 int32_t endClipPos = readLength - numBackClips;
201 if((numFrontClips != 0) || (numBackClips != 0))
206 int32_t totalClips = numFrontClips + numBackClips;
207 if(totalClips >= readLength)
219 int origCigarOpIndex = 0;
224 int32_t numPositions = 0;
227 bool onlyClips =
true;
233 while((origCigarOpIndex < oldCigar.
size()) &&
234 (numPositions < numFrontClips))
237 switch(op->operation)
256 numPositions += op->count;
268 if(numFrontClips != 0)
275 int32_t newCount = numPositions - numFrontClips;
283 if(numPositions > endClipPos)
285 newCount -= (numPositions - endClipPos);
289 updatedCigar.
Add(op->operation, newCount);
307 while((origCigarOpIndex < oldCigar.
size()) &&
308 (numPositions <= endClipPos))
337 uint32_t numPosTilClip = endClipPos - numPositions;
339 if(numPosTilClip < op->count)
343 if(numPosTilClip != 0)
345 updatedCigar.
Add(op->operation,
364 numPositions += op->count;
373 if(numBackClips != 0)
381 while(origCigarOpIndex < oldCigar.
size())
407 if(numFrontClips > 0)
415 int32_t lastFrontClipPos = numFrontClips - 1;
421 startPos = newStartPos + 1;
432 uint32_t qualityThreshold,
433 uint8_t defaultQualityInt)
435 uint32_t totalMismatchQuality =
440 if(totalMismatchQuality > qualityThreshold)
453 uint8_t defaultQualityInt)
456 int mismatchQual = 0;
464 if(baseMatchInfo.
getType() == SamSingleBaseMatchInfo::MISMATCH)
467 char readQualityChar =
469 uint8_t readQualityInt =
475 readQualityInt = defaultQualityInt;
477 mismatchQual += readQualityInt;
482 return(mismatchQual);
489 uint16_t flag = record.
getFlag();
493 flag &= ~
SamFlag::SECONDARY_ALIGNMENT;
494 flag &= ~
SamFlag::SUPPLEMENTARY_ALIGNMENT;
int32_t getRefPosition(int32_t queryIndex, int32_t queryStartPos)
Return the reference position associated with the specified query index or INDEX_NA based on this cig...
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
static bool isClip(Operation op)
Return true if the specified operation is a clipping operation, false if not.
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
Filtering clipped the read.
static uint32_t sumMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint8_t defaultQualityInt)
Get the sum of the qualities of all mismatches in the record.
skipped region from the reference (the reference contains bases that have no corresponding base in th...
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
no operation has been set.
The filter did not affect the read.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
int size() const
Return the number of cigar operations.
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
static void setUnmapped(uint16_t &flag)
Mark the passed in flag as unmapped.
Type getType()
Get the type (match/mismatch/unknown) for this object.
Hard clip on the read (clipped sequence not present in the query sequence or reference). Associated with CIGAR Operation "H".
int32_t getReadLength()
Get the length of the read.
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch...
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that)...
Iterates through the query and compare with reference.
static void filterRead(SamRecord &record)
Filter the read by marking it as unmapped.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
static FilterStatus filterOnMismatchQuality(SamRecord &record, GenomeSequence &refSequence, uint32_t qualityThreshold, uint8_t defaultQualityInt)
Filter the read based on the specified quality threshold.
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
mismatch operation. Associated with CIGAR Operation "M"
Filtering caused the read to be modified to unmapped.
Soft clip on the read (clipped sequence present in the query sequence, but not in reference)...
match/mismatch operation. Associated with CIGAR Operation "M"
static uint8_t getPhredBaseQuality(char charQuality)
Get phred base quality from the specified ascii quality.
Class for extracting information from a SAM Flag.
int32_t getQueryIndex()
Get the query index for this object.
static const uint8_t UNKNOWN_QUALITY_INT
Int value used when the quality is unknown.
deletion from the reference (the reference contains bases that have no corresponding base in the quer...
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
int getExpectedQueryBaseCount() const
Return the length of the read that corresponds to the current CIGAR string.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
FilterStatus
Enum describing what sort of filtering was done.
const CigarOperator & getOperator(int i) const
Return the Cigar Operation at the specified index (starting at 0).
static FilterStatus clipOnMismatchThreshold(SamRecord &record, GenomeSequence &refSequence, double mismatchThreshold)
Clip the read based on the specified mismatch threshold.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record...
uint16_t getFlag()
Get the flag (FLAG).
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
static FilterStatus softClip(SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
Soft clip the record from the front and/or the back.
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object...
This class contains the match/mismatch information between the reference and a read for a single base...
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.