libStatGen Software 1
Loading...
Searching...
No Matches
SamQuerySeqWithRefHelper.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __SAM_QUERY_SEQ_WITH_REF_HELPER_H__
19#define __SAM_QUERY_SEQ_WITH_REF_HELPER_H__
20
21#include <stdint.h>
22
23#include "SamRecord.h"
24#include "GenomeSequence.h"
25
26/// This class contains the match/mismatch information
27/// between the reference and a read for a single base.
29{
30public:
31 /// More types can be added later as needed.
32 enum Type {UNKNOWN, MATCH, MISMATCH};
33
36
37
38 /// Get the type (match/mismatch/unknown) for this object.
39 Type getType();
40
41 /// Get the query index for this object.
42 int32_t getQueryIndex();
43
44 /// Set the type (match/mismatch/unkown) for this object.
45 void setType(Type newType);
46
47 /// Set the query index for this object.
48 void setQueryIndex(int32_t queryIndex);
49
50private:
51 Type myType;
52 int32_t myQueryIndex;
53};
54
55/// Iterates through the query and compare with reference.
56/// NOTE: References to the GenomeSequence and SamRecord are stored, the objects
57/// are not copied, so they must remain valid as long as this class is used.
59{
60public:
62 bool forward = true);
64
65 /// Reset to start at the beginning of the record.
66 /// This will re-read values from SamRecord, so can be used if it has
67 /// changed to contain information for a new record.
68 /// \param forward true means to start from the beginning and go to the end;
69 /// false means to start from the end and go to the beginning.
70 /// \return true if successfully reset; false if failed to read the Cigar.
71 bool reset(bool forward = true);
72
73 /// Returns information for the next position where the query and the
74 /// reference match or mismatch. To be a match or mismatch, both the query
75 /// and reference must have a base that is not 'N'.
76 /// This means:
77 /// insertions and deletions are not mismatches or matches.
78 /// 'N' bases are not matches or mismatches
79 /// \param matchMismatchInfo return parameter with the information about
80 /// the matching/mismatching base.
81 /// \return true if there was another match/mismatch
82 /// (matchMismatchInfo was set); false if not.
83 bool getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo);
84
85private:
86
88
89 void nextIndex();
90
91 SamRecord& myRecord;
92 GenomeSequence& myRefSequence;
93 Cigar* myCigar;
94 uint32_t myStartOfReadOnRefIndex;
95 int32_t myQueryIndex;
96 bool myForward;
97};
98
99
100/// Contains methods for converting between the query sequence and reference.
102{
103public:
104 /// Gets the sequence with '=' in any position where the sequence matches
105 /// the reference.
106 /// NOTE: 'N' in both the sequence and the reference is not considered a
107 /// match.
108 /// \param currentSeq sequence that should be converted
109 /// \param seq0BasedPos 0 based start position of currentSeq on the reference.
110 /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference)
111 /// \param referenceName reference name associated with this sequence
112 /// \param refSequence reference sequence object
113 /// \param updatedSeq return parameter that this method sets to the
114 /// current sequence, replacing any matches to the reference with '='.
115 static void seqWithEquals(const char* currentSeq,
116 int32_t seq0BasedPos,
117 Cigar& cigar,
118 const char* referenceName,
119 const GenomeSequence& refSequence,
120 std::string& updatedSeq);
121
122 /// Gets the sequence converting '=' to the appropriate base using the
123 /// reference.
124 /// \param currentSeq sequence that should be converted
125 /// \param seq0BasedPos 0 based start position of currentSeq on the reference.
126 /// \param cigar cigar string for currentSeq (used for determining how the sequence aligns to the reference)
127 /// \param referenceName reference name associated with this sequence
128 /// \param refSequence reference sequence object
129 /// \param updatedSeq return parameter that this method sets to the
130 /// current sequence, replacing any '=' with the base from the reference.
131 static void seqWithoutEquals(const char* currentSeq,
132 int32_t seq0BasedPos,
133 Cigar& cigar,
134 const char* referenceName,
135 const GenomeSequence& refSequence,
136 std::string& updatedSeq);
137
138private:
140};
141#endif
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition Cigar.h:84
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Iterates through the query and compare with reference.
bool reset(bool forward=true)
Reset to start at the beginning of the record.
bool getNextMatchMismatch(SamSingleBaseMatchInfo &matchMismatchInfo)
Returns information for the next position where the query and the reference match or mismatch.
Contains methods for converting between the query sequence and reference.
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition SamRecord.h:52
This class contains the match/mismatch information between the reference and a read for a single base...
void setQueryIndex(int32_t queryIndex)
Set the query index for this object.
void setType(Type newType)
Set the type (match/mismatch/unkown) for this object.
Type
More types can be added later as needed.
int32_t getQueryIndex()
Get the query index for this object.
Type getType()
Get the type (match/mismatch/unknown) for this object.