libStatGen Software 1
Loading...
Searching...
No Matches
GlfRecord.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __GLF_RECORD_H__
19#define __GLF_RECORD_H__
20
21#include <map>
22#include <stdint.h>
23
24#include "InputFile.h"
25#include "CharBuffer.h"
26
27/// This class allows a user to easily get/set the fields in a GLF record.
29{
30public:
31 /// Constructor
32 GlfRecord();
33
34 /// Destructor
35 ~GlfRecord();
36
37// // Copy Constructor
38// GlfRecord(const GlfRecord& record);
39
40// // Overload operator = to copy the passed in record into this record.
41// GlfRecord & operator = (const GlfRecord& record);
42
43// // Overload operator = to copy the passed in record into this record.
44// bool copy(const GlfRecord& record);
45
46 /// Clear this record back to the default setting.
47 void reset();
48
49 /// Read the record from the specified file (file MUST be in
50 /// the correct position for reading a record).
51 /// \param filePtr file to read from that is in the correct position.
52 /// \return true if the record was successfully read from the file (even
53 /// if it is an endMarker), false if it was not successfully read.
54 bool read(IFILE filePtr);
55
56 /// Write the record to the specified file.
57 /// \param filePtr file to write to that is in the correct position.
58 /// \return true if the record was successfully written to the
59 /// file, false if not.
60 bool write(IFILE filePtr) const;
61
62 /// Print the reference section in a readable format.
63 void print() const;
64
65 /// @name Generic Accessors for Record Types 1 & 2
66 //@{
67 /// Set the record type and reference base.
68 /// \param rtypeRef record type & reference base. Formatted as:
69 /// record_type<<4|numeric_ref_base.
70 /// \return true if the record type and reference base were successfully
71 /// set, false if not.
72 bool setRtypeRef(uint8_t rtypeRef);
73
74 /// Set the record type.
75 /// \param recType record type: 1 - simple likelihood record,
76 /// 2 - indel likelihood record, 0 - end maker
77 /// \return true if the record type was successfully set, false if not.
78 bool setRecordType(uint8_t recType);
79
80 /// Set the reference base from an integer value.
81 /// \param refBase integer representation of the reference base.
82 /// \anchor BaseCharacterIntMap
83 /// <table>
84 /// <tr><th>Int Value</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td><td>15</td></tr>
85 /// <tr><th>Character Base</th><td>X</td><td>A</td><td>C</td><td>M</td><td>G</td><td>R</td><td>S</td><td>V</td><td>T</td><td>W</td><td>Y</td><td>H</td><td>K</td><td>D</td><td>B</td><td>N</td></tr>
86 /// </table>
87 /// \return true if the reference base was successfully set, false if not.
88 bool setRefBaseInt(uint8_t refBase);
89
90 // TODO bool setRefBaseChar(char refBase);
91
92 /// Set the offset from the precedent record.
93 /// 0-based coordinate of the record minus the coordinate of the
94 /// precedent record. For the first record in a reference sequence,
95 /// the previous coordinate is 0.
96 /// For insertions between x & x+1, the coordinate is x.
97 /// For deletions between x & y, the coordinate is x.
98 /// \param offset offset from the precedent record.
99 /// \return true if successfully set, false if not.
100 bool setOffset(uint32_t offset);
101
102 /// Set the minimum likelihood and the read depth.
103 /// \param minDepth minimum likelihood and read depth. Formatted as:
104 /// min_lk<<24|read_dpeth. (min_lk capped at 255)
105 /// \return true if successfully set, false if not.
106 bool setMinDepth(uint32_t minDepth);
107
108 /// Set the minimum likelihood.
109 /// \param minLk minimum likelihood (capped at 255).
110 /// \return true if successfully set, false if not.
111 bool setMinLk(uint8_t minLk);
112
113 /// Set the the read depth.
114 /// \param readDepth read depth.
115 /// \return true if successfully set, false if not.
116 bool setReadDepth(uint32_t readDepth);
117
118 /// Set the RMS of mapping qualities of reads covering the site.
119 /// \param rmsMapQ RMS of mapping qualities
120 /// \return true if successfully set, false if not.
121 bool setRmsMapQ(uint8_t rmsMapQ);
122
123 /// Return the record type.
124 /// \return record type for this record: 0 - endMarker,
125 /// 1 - simple likelihood, 2 - indel likelihood
126 inline int getRecordType() const
127 {
128 return(myRecTypeRefBase >> REC_TYPE_SHIFT);
129 }
130
131 /// Return the reference base as an integer.
132 /// \return integer representation of the reference base.
133 /// See: \ref BaseCharacterIntMap
134 inline int getRefBase() const
135 {
136 return(myRecTypeRefBase & REF_BASE_MASK);
137 }
138
139 /// Return the reference base as a character.
140 /// \return character representation of the reference base.
141 char getRefBaseChar() const;
142
143 /// Return the offset from the precedent record.
144 /// \return offset from the precedent record.
145 uint32_t getOffset() const;
146
147 /// Return the minimum likelihood and read depth. Formatted as:
148 /// min_lk<<24|read_dpeth. (min_lk capped at 255)
149 /// \return minimum likelihood and read depth
150 uint32_t getMinDepth() const;
151
152 /// Return the minimum likelihood
153 /// \return minimum likelihood
154 uint8_t getMinLk() const;
155
156 /// Return the read depth.
157 /// \return read depth
158 uint32_t getReadDepth() const;
159
160 /// Return the RMS of mapping qualities of reads covering the site.
161 /// \return RMS of maping qualities.
162 uint8_t getRmsMapQ() const;
163
164 //@}
165
166 /// @name Record Type 1 Accessors
167 /// Record Type 1: Simple Likelihood Record
168 //@{
169 //bool setType1(all fields for type 1);
170
171 /// Set the likelihood for the specified genotype.
172 /// Throws an exception if index is out of range.
173 /// \param index index for the genotype for which the likelihood is
174 /// being set.
175 /// \anchor GenotypeIndexTable
176 /// <table>
177 /// <tr><th>Index</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td></tr>
178 /// <tr><th>Genotype</th><td>AA</td><td>AC</td><td>AG</td><td>AT</td><td>CC</td><td>CG</td><td>CT</td><td>GG</td><td>GT</td><td>TT</td></tr>
179 /// </table>
180 /// \param value likelihood for the genotype at the specified index.
181 /// \return true if successfully set, false if not.
182 bool setLk(int index, uint8_t value);
183
184 //bool getType1(all fields for type 1);
185
186 /// Get the likelihood for the specified genotype index.
187 /// Throws an exception if index is out of range.
188 /// \param index index of the genotype for which the likelihood should
189 /// be returned. See: \ref GenotypeIndexTable
190 /// \return likelihood of the specified index.
191 uint8_t getLk(int index);
192 //@}
193
194 /// @name Record Type 2 Accessors
195 /// Record Type2: Indel Likelihood Record
196 //@{
197// bool setType2(all fields for type 2);
198
199 /// Set the likelihood of the first homozygous indel allele.
200 /// \param lk likelihood of the 1st homozygous indel allele (capped at 255)
201 /// \return true if successfully set, false if not.
202 bool setLkHom1(uint8_t lk);
203
204 /// Set the likelihood of the 2nd homozygous indel allele.
205 /// \param lk likelihood of the 2nd homozygous indel allele (capped at 255)
206 /// \return true if successfully set, false if not.
207 bool setLkHom2(uint8_t lk);
208
209 /// Set the likelihood of a heterozygote.
210 /// \param lk likelihood of a heterozygote (capped at 255)
211 /// \return true if successfully set, false if not.
212 bool setLkHet(uint8_t lk);
213
214 /// Set the sequence of the first indel allele if the
215 /// first indel is an insertion.
216 /// \param indelSeq sequence of the first indel allele (insertion).
217 /// \return true if successfully set, false if not.
218 bool setInsertionIndel1(const std::string& indelSeq);
219
220 /// Set the sequence of the first indel allele if the
221 /// first indel is an deletion.
222 /// \param indelSeq sequence of the first indel allele (deletion).
223 /// \return true if successfully set, false if not.
224 bool setDeletionIndel1(const std::string& indelSeq);
225
226 /// Set the sequence of the 2nd indel allele if the
227 /// 2nd indel is an insertion.
228 /// \param indelSeq sequence of the 2nd indel allele (insertion).
229 /// \return true if successfully set, false if not.
230 bool setInsertionIndel2(const std::string& indelSeq);
231
232 /// Set the sequence of the 2nd indel allele if the
233 /// 2nd indel is an deletion.
234 /// \param indelSeq sequence of the 2nd indel allele (deletion).
235 /// \return true if successfully set, false if not.
236 bool setDeletionIndel2(const std::string& indelSeq);
237
238 // bool setType2(all fields for type 2);
239
240 /// Return the likelihood of the 1st homozygous indel allele.
241 /// \return likelihood of the 1st homozygous indel allele.
242 uint8_t getLkHom1();
243
244 /// Return the likelihood of the 2nd homozygous indel allele.
245 /// \return likelihood of the 2nd homozygous indel allele.
246 uint8_t getLkHom2();
247
248 /// Return the likelihood of a heterozygote.
249 /// \return likelihood of a hetereozygote.
250 uint8_t getLkHet();
251
252 /// Get the sequence and length (+:ins, -:del) of the 1st indel allele.
253 /// \param indelSeq string to set with the sequence of the 1st indel allele
254 /// \return length of the 1st indel allele
255 /// (positive=insertion; negative=deletion; 0=no-indel)
256 int16_t getIndel1(std::string& indelSeq);
257
258 /// Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
259 /// \param indelSeq string to set with the sequence of the 2nd indel allele
260 /// \return length of the 2nd indel allele
261 /// (positive=insertion; negative=deletion; 0=no-indel)
262 int16_t getIndel2(std::string& indelSeq);
263 //@}
264
265private:
266 // Read a record of record type 1.
267 void readType1(IFILE filePtr);
268
269 // Read a record of record type 2.
270 void readType2(IFILE filePtr);
271
272
273 // Write the rtyperef field.
274 void writeRtypeRef(IFILE filePtr) const;
275
276
277 // Write a record of record type 1.
278 void writeType1(IFILE filePtr) const;
279
280 // Write a record of record type 2.
281 void writeType2(IFILE filePtr) const;
282
283 // Contains record_type and ref_base.
284 uint8_t myRecTypeRefBase;
285
286 static const uint8_t REC_TYPE_SHIFT = 4;
287 static const uint8_t REF_BASE_MASK = 0xF;
288 static const uint8_t REC_TYPE_MASK = 0xF0;
289
290 static const uint32_t MIN_LK_SHIFT = 24;
291 static const uint32_t READ_DEPTH_MASK = 0xFFFFFF;
292 static const uint32_t MIN_LK_MASK = 0xFF000000;
293
294 static const char REF_BASE_MAX = 15;
295 static std::string REF_BASE_CHAR;
296
297 static const int NUM_REC1_LIKELIHOOD = 10;
298
299 struct
300 {
301 uint32_t offset;
302 uint32_t min_depth;
303 uint8_t rmsMapQ;
304 uint8_t lk[GlfRecord::NUM_REC1_LIKELIHOOD];
305 } myRec1Base;
306
307 static const int REC1_BASE_SIZE = 19;
308
309 struct
310 {
311 uint32_t offset;
312 uint32_t min_depth;
313 uint8_t rmsMapQ;
314 uint8_t lkHom1;
315 uint8_t lkHom2;
316 uint8_t lkHet;
317 int16_t indelLen1;
318 int16_t indelLen2;
319 } myRec2Base;
320
321 // TODO rest of rec 2.
322 CharBuffer myIndelSeq1;
323 CharBuffer myIndelSeq2;
324
325 static const int REC2_BASE_SIZE = 16;
326
327};
328
329#endif
This class allows a user to easily get/set the fields in a GLF record.
Definition GlfRecord.h:29
bool setLk(int index, uint8_t value)
Set the likelihood for the specified genotype.
void print() const
Print the reference section in a readable format.
bool setDeletionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an deletion.
GlfRecord()
Constructor.
Definition GlfRecord.cpp:25
int getRecordType() const
Return the record type.
Definition GlfRecord.h:126
uint8_t getLkHet()
Return the likelihood of a heterozygote.
bool write(IFILE filePtr) const
Write the record to the specified file.
uint8_t getLkHom1()
Return the likelihood of the 1st homozygous indel allele.
char getRefBaseChar() const
Return the reference base as a character.
bool setLkHom1(uint8_t lk)
Set the likelihood of the first homozygous indel allele.
~GlfRecord()
Destructor.
Definition GlfRecord.cpp:31
int getRefBase() const
Return the reference base as an integer.
Definition GlfRecord.h:134
bool setDeletionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an deletion.
bool setRmsMapQ(uint8_t rmsMapQ)
Set the RMS of mapping qualities of reads covering the site.
uint8_t getRmsMapQ() const
Return the RMS of mapping qualities of reads covering the site.
bool setInsertionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an insertion.
uint32_t getOffset() const
Return the offset from the precedent record.
bool setRecordType(uint8_t recType)
Set the record type.
uint32_t getMinDepth() const
Return the minimum likelihood and read depth.
bool setRefBaseInt(uint8_t refBase)
Set the reference base from an integer value.
bool setReadDepth(uint32_t readDepth)
Set the the read depth.
bool setLkHet(uint8_t lk)
Set the likelihood of a heterozygote.
uint8_t getMinLk() const
Return the minimum likelihood.
bool setRtypeRef(uint8_t rtypeRef)
Set the record type and reference base.
bool setLkHom2(uint8_t lk)
Set the likelihood of the 2nd homozygous indel allele.
int16_t getIndel1(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 1st indel allele.
int16_t getIndel2(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
bool read(IFILE filePtr)
Read the record from the specified file (file MUST be in the correct position for reading a record).
Definition GlfRecord.cpp:65
uint32_t getReadDepth() const
Return the read depth.
uint8_t getLk(int index)
Get the likelihood for the specified genotype index.
bool setMinLk(uint8_t minLk)
Set the minimum likelihood.
void reset()
Clear this record back to the default setting.
Definition GlfRecord.cpp:38
uint8_t getLkHom2()
Return the likelihood of the 2nd homozygous indel allele.
bool setOffset(uint32_t offset)
Set the offset from the precedent record.
bool setInsertionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an insertion.
bool setMinDepth(uint32_t minDepth)
Set the minimum likelihood and the read depth.
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition InputFile.h:37