libStatGen Software 1
Loading...
Searching...
No Matches
SamQuerySeqWithRef Class Reference

Contains methods for converting between the query sequence and reference. More...

#include <SamQuerySeqWithRefHelper.h>

Static Public Member Functions

static void seqWithEquals (const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
 Gets the sequence with '=' in any position where the sequence matches the reference.
 
static void seqWithoutEquals (const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
 Gets the sequence converting '=' to the appropriate base using the reference.
 

Detailed Description

Contains methods for converting between the query sequence and reference.

Definition at line 101 of file SamQuerySeqWithRefHelper.h.

Member Function Documentation

◆ seqWithEquals()

void SamQuerySeqWithRef::seqWithEquals ( const char *  currentSeq,
int32_t  seq0BasedPos,
Cigar cigar,
const char *  referenceName,
const GenomeSequence refSequence,
std::string &  updatedSeq 
)
static

Gets the sequence with '=' in any position where the sequence matches the reference.


NOTE: 'N' in both the sequence and the reference is not considered a match.

Parameters
currentSeqsequence that should be converted
seq0BasedPos0 based start position of currentSeq on the reference.
cigarcigar string for currentSeq (used for determining how the sequence aligns to the reference)
referenceNamereference name associated with this sequence
refSequencereference sequence object
updatedSeqreturn parameter that this method sets to the current sequence, replacing any matches to the reference with '='.

Definition at line 243 of file SamQuerySeqWithRefHelper.cpp.

249{
250 updatedSeq = currentSeq;
251
252 int32_t seqLength = updatedSeq.length();
253 int32_t queryIndex = 0;
254
255 uint32_t startOfReadOnRefIndex =
256 refSequence.getGenomePosition(referenceName);
257
258 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
259 {
260 // This reference name was not found in the reference file, so just
261 // return.
262 return;
263 }
264 startOfReadOnRefIndex += seq0BasedPos;
265
266 // Loop until the entire sequence has been updated.
267 while(queryIndex < seqLength)
268 {
269 // Still more bases, look for matches.
270
271 // Get the reference offset for this read position.
272 int32_t refOffset = cigar.getRefOffset(queryIndex);
273 if(refOffset != Cigar::INDEX_NA)
274 {
275 // Both the reference and the read have a base, so get the bases.
276 char readBase = currentSeq[queryIndex];
277 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
278
279 // If neither base is unknown and they are the same, count it
280 // as a match.
281 if(!BaseUtilities::isAmbiguous(readBase) &&
282 !BaseUtilities::isAmbiguous(refBase) &&
283 (BaseUtilities::areEqual(readBase, refBase)))
284 {
285 // Match.
286 updatedSeq[queryIndex] = '=';
287 }
288 }
289 // Increment the query index to the next position.
290 ++queryIndex;
291 continue;
292 }
293}
static bool isAmbiguous(char base)
Returns whether or not the specified bases is an indicator for ambiguity.
static bool areEqual(char base1, char base2)
Returns whether or not two bases are equal (case insensitive), if one of the bases is '=',...
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
Definition Cigar.h:492
int32_t getRefOffset(int32_t queryIndex)
Return the reference offset associated with the specified query index or INDEX_NA based on this cigar...
Definition Cigar.cpp:187
genomeIndex_t getGenomePosition(const char *chromosomeName, unsigned int chromosomeIndex) const
given a chromosome name and position, return the genome position

References BaseUtilities::areEqual(), GenomeSequence::getGenomePosition(), Cigar::getRefOffset(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().

Referenced by SamRecord::getSequence(), and SamRecord::getSequence().

◆ seqWithoutEquals()

void SamQuerySeqWithRef::seqWithoutEquals ( const char *  currentSeq,
int32_t  seq0BasedPos,
Cigar cigar,
const char *  referenceName,
const GenomeSequence refSequence,
std::string &  updatedSeq 
)
static

Gets the sequence converting '=' to the appropriate base using the reference.

Parameters
currentSeqsequence that should be converted
seq0BasedPos0 based start position of currentSeq on the reference.
cigarcigar string for currentSeq (used for determining how the sequence aligns to the reference)
referenceNamereference name associated with this sequence
refSequencereference sequence object
updatedSeqreturn parameter that this method sets to the current sequence, replacing any '=' with the base from the reference.

Definition at line 296 of file SamQuerySeqWithRefHelper.cpp.

302{
303 updatedSeq = currentSeq;
304
305 int32_t seqLength = updatedSeq.length();
306 int32_t queryIndex = 0;
307
308 uint32_t startOfReadOnRefIndex =
309 refSequence.getGenomePosition(referenceName);
310
311 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
312 {
313 // This reference name was not found in the reference file, so just
314 // return.
315 return;
316 }
317 startOfReadOnRefIndex += seq0BasedPos;
318
319 // Loop until the entire sequence has been updated.
320 while(queryIndex < seqLength)
321 {
322 // Still more bases, look for matches.
323
324 // Get the reference offset for this read position.
325 int32_t refOffset = cigar.getRefOffset(queryIndex);
326 if(refOffset != Cigar::INDEX_NA)
327 {
328 // Both the reference and the read have a base, so get the bases.
329 char readBase = currentSeq[queryIndex];
330 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
331
332 // If the bases are equal, set the sequence to the reference
333 // base. (Skips the check for ambiguous to catch a case where
334 // ambiguous had been converted to a '=', and if both are ambiguous,
335 // it will still be set to ambiguous.)
336 if(BaseUtilities::areEqual(readBase, refBase))
337 {
338 // Match.
339 updatedSeq[queryIndex] = refBase;
340 }
341 }
342
343 // Increment the query index to the next position.
344 ++queryIndex;
345 continue;
346 }
347}

References BaseUtilities::areEqual(), GenomeSequence::getGenomePosition(), Cigar::getRefOffset(), and Cigar::INDEX_NA.

Referenced by SamRecord::getSequence(), and SamRecord::getSequence().


The documentation for this class was generated from the following files: