18#ifndef __SAM_RECORD_H__
19#define __SAM_RECORD_H__
23#include "GenomeSequence.h"
26#include "MathVector.h"
27#include "StringArray.h"
29#include "SamFileHeader.h"
30#include "CigarRoller.h"
37 int32_t myReferenceID;
39 uint32_t myReadNameLength : 8, myMapQuality : 8, myBin : 16;
40 uint32_t myCigarLength : 16, myFlag : 16;
42 int32_t myMateReferenceID;
43 int32_t myMatePosition;
120 const char* referenceName);
164 const char* mateReferenceName);
234 bool addIntTag(
const char* tag, int32_t value);
244 bool addTag(
const char* tag,
char vtype,
const char* value);
254 bool rmTag(
const char* tag,
char type);
264 bool rmTags(
const char* tags);
620 bool checkTag(
const char * tag,
char type);
629 static int MAKEKEY(
char ch1,
char ch2,
char type)
630 {
return (getKeyType(type) << 16) + (ch2 << 8) + ch1; }
632 static char getKeyType(
char type)
652 static inline int getNumericTagTypeSize(
char type)
679 bool allocateRecordStructure(
int size);
681 void* getStringPtr(
int offset);
682 void* getIntegerPtr(
int offset,
char& vtype);
683 void* getFloatPtr(
int offset);
693 void setSequenceAndQualityFromBuffer();
702 bool parseCigarBinary();
706 bool parseCigarString();
710 bool setTagsFromBuffer();
714 bool setTagsInBuffer();
718 void getTypeFromKey(
int key,
char& type)
const;
719 void getTag(
int key,
char* tag)
const;
723 const char & getIntegerType(
int offset)
const;
724 float & getFloat(
int offset);
728 inline void appendIntArrayValue(
int index,
String& strVal)
const
730 appendIntArrayValue(intType[index], integers[index], strVal);
733 void appendIntArrayValue(
char type,
int value,
String& strVal)
const;
735 int getBtagBufferSize(
String& tagStr);
736 int setBtagBuffer(
String& tagStr,
char* extraPtr);
737 int getStringFromBtagBuffer(
unsigned char* buffer,
String& tagStr);
739 static const int DEFAULT_BLOCK_SIZE = 40;
740 static const int DEFAULT_BIN = 4680;
741 static const int DEFAULT_READ_NAME_LENGTH = 8;
742 static const char* DEFAULT_READ_NAME;
743 static const char* FIELD_ABSENT_STRING;
750 uint32_t* myCigarTempBuffer;
753 int myCigarTempBufferAllocatedSize;
756 int myCigarTempBufferLength;
762 bool myIsBufferSynced;
765 bool myNeedToSetTagsFromBuffer;
770 bool myNeedToSetTagsInBuffer;
777 String myMateReferenceName;
782 std::string mySeqWithEq;
783 std::string mySeqWithoutEq;
786 int32_t myAlignmentLength;
788 int32_t myUnclippedStartOffset;
789 int32_t myUnclippedEndOffset;
801 std::vector<char> intType;
802 std::vector<float> floats;
807 bool myIsReadNameBufferValid;
808 bool myIsCigarBufferValid;
809 bool myIsSequenceBufferValid;
810 bool myIsQualityBufferValid;
811 bool myIsTagsBufferValid;
814 unsigned char* myPackedSequence;
815 unsigned char* myPackedQuality;
831 String NOT_FOUND_TAG_STRING;
832 int NOT_FOUND_TAG_INT;
834 static const int myMaxWarns = 5;
835 static int myNumWarns;
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
HandlingType
This specifies how this class should respond to errors.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
int32_t getBlockSize()
Get the block size of the record (BAM format).
uint16_t getCigarLength()
Get the length of the BAM formatted CIGAR.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
@ NONE
Leave the sequence as is.
@ BASES
Translate '=' to the actual base.
@ EQUAL
Translate bases that match the reference to '='.
bool setReadName(const char *readName)
Set QNAME to the passed in name.
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
bool checkString(const char *tag)
Check if the specified tag contains a string.
int32_t get0BasedMatePosition()
Get the 0-based(BAM) leftmost mate/next fragment's position.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
void clearTags()
Clear the tags in this record.
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
bool getTagsString(const char *tags, String &returnString, char delim='\t')
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE...
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
int & getInteger(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.
bool setInsertSize(int32_t insertSize)
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
uint32_t getTagLength()
Returns the length of the BAM formatted tags.
SamRecord()
Default Constructor.
static bool isIntegerType(char vtype)
Returns whether or not the specified vtype is an integer type.
bool rmTag(const char *tag, char type)
Remove a tag.
bool setMateReferenceName(SamFileHeader &header, const char *mateReferenceName)
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name,...
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
bool checkFloat(const char *tag)
Check if the specified tag contains a string.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
bool getFloatTag(const char *tag, float &tagVal)
Get the float value for the specified tag.
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
static bool isFloatType(char vtype)
Returns whether or not the specified vtype is a float type.
SamStatus::Status setBuffer(const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
int32_t get1BasedUnclippedStart()
Returns the 1-based inclusive left-most position adjusted for clipped bases.
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.
uint16_t getBin()
Get the BAM bin for the record.
bool isValid(SamFileHeader &header)
Returns whether or not the record is valid, setting the status to indicate success or failure.
int32_t getMateReferenceID()
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
void resetRecord()
Reset the fields of the record to a default value.
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
bool set1BasedPosition(int32_t position)
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
uint16_t getFlag()
Get the flag (FLAG).
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
bool checkInteger(const char *tag)
Check if the specified tag contains an integer.
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
bool shiftIndelsLeft()
Shift the indels (if any) to the left by updating the CIGAR.
int * getIntegerTag(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure...
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
static bool isCharType(char vtype)
Returns whether or not the specified vtype is a char type.
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
int32_t get1BasedUnclippedEnd()
Returns the 1-based inclusive right-most position adjusted for clipped bases.
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
const char * getMateReferenceName()
Get the mate/next fragment's reference sequence name (RNEXT).
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.
bool getNextSamTag(char *tag, char &vtype, void **value)
Get the next tag from the record.
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
bool setSequence(const char *seq)
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
int32_t getReadLength()
Get the length of the read.
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
bool set1BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value...
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
const char * getCigar()
Returns the SAM formatted CIGAR string.
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
const String & getString(const char *tag)
Get the string value for the specified tag.
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
void resetTagIter()
Reset the tag iterator to the beginning of the tags.
bool setQuality(const char *quality)
Sets the quality (QUAL) to the specified SAM formatted quality string.
bool setReferenceName(SamFileHeader &header, const char *referenceName)
Set the reference sequence name (RNAME) to the specified name, using the header to determine the refe...
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
bool rmTags(const char *tags)
Remove tags.
static bool isStringType(char vtype)
Returns whether or not the specified vtype is a string type.
This class is used to track the status results of some methods in the BAM classes.
Status
Return value enum for StatGenFile methods.
Structure of a BAM record.