libStatGen Software 1
Loading...
Searching...
No Matches
SamValidation.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __SAM_VALIDATION_H__
19#define __SAM_VALIDATION_H__
20
21#include "SamFile.h"
22#include <list>
23
24// On windows, ERROR and WARNING are pre-defined macros, so undefine them.
25#ifdef WARNING
26#undef WARNING
27#endif
28#ifdef ERROR
29#undef ERROR
30#endif
31
32/// The SamValidationError class describes a validation error that occured,
33/// containing the error type, severity, and textual error message.
35{
36public:
37 /// Severity of the error.
39 {
40 WARNING, ///< Warning is used if it is just an invalid value.
41 ERROR ///< Error is used if parsing could not succeed.
42 };
43
44 /// Type of the error.
45 /// TODO: NOT ALL INVALID TYPES HAVE BEEN ADDED SINCE NOT ALL VALIDATION
46 /// IS COMPLETE YET
47 enum Type
48 {
49 INVALID_QNAME, ///< Invalid read/query name
50 INVALID_REF_ID, ///< Invalid reference id
51 INVALID_RNAME, ///< Invalid reference name
52 INVALID_POS, ///< Invalid position
53 INVALID_MAPQ, ///< Invalid mapping quality
54 INVALID_CIGAR, ///< Invalid CIGAR
55 INVALID_MRNM, ///< Invalid mate/next fragment reference name
56 INVALID_QUAL, ///< Invalid base quality
57 INVALID_TAG ///< Invalid tag
58 };
59
60 /// Get the string representing the specified type of validation error.
61 static const char* getTypeString(Type type);
62
63 /// Constructor that sets the type, severity, and message for the
64 /// validation error.
65 SamValidationError(Type type, Severity severity, std::string Message);
66
67 /// Return the type enum of this validation error object.
68 Type getType() const;
69
70 /// Return the severity enum of this validation error object.
71 Severity getSeverity() const;
72
73 /// Return the error message of this validation error object.
74 const char* getMessage() const;
75
76 /// Return the string representing this object's type of validation error.
77 const char* getTypeString() const;
78
79 /// Return the string representing this object's severity of validation
80 /// error.
81 const char* getSeverityString() const;
82
83 /// Get the error string representing this object's error.
84 void getErrorString(std::string& errorString) const;
85
86 /// Print a formatted output of the error to cerr.
87 void printError() const;
88
89private:
91
92 static const char* enumTypeString[];
93 static const char* enumSeverityString[];
94
95 Type myType;
96 Severity mySeverity;
97 std::string myMessage;
98
99};
100
101
102/// stream output for validation failure information
103inline std::ostream &operator << (std::ostream &stream,
104 const SamValidationError &error)
105{
106 std::string errorMessage;
107 error.getErrorString(errorMessage);
108 stream << errorMessage;
109 return stream;
110}
111
112
113/// The SamValidationErrors class is a container class that holds
114/// SamValidationError Objects, allowing a validation method to return all
115/// of the invalid errors rather than just one.
117{
118public:
119 /// Constructor.
121 /// Destructor
123
124 /// Remove all the errors from the container.
125 void clear();
126
127 /// Add the specified error to this container.
130 const char* newMessage);
131
132 /// Return the number of validation errors contained in this object.
133 unsigned int numErrors();
134
135 /// Return a pointer to the next error without removing it from the
136 /// container, and returning null once all errors have been retrieved
137 /// until resetErrorIter is called.
139
140 /// Reset the iterator to the begining of the errors.
141 void resetErrorIter();
142
143 /// Append the error messages contained in this container to the passed
144 /// in string.
145 void getErrorString(std::string& errorString) const;
146
147private:
148 std::list<const SamValidationError*> myValidationErrors;
149 std::list<const SamValidationError*>::const_iterator myErrorIter;
150};
151
152
153/// stream output for all validation failures information
154inline std::ostream& operator << (std::ostream& stream,
155 const SamValidationErrors& errors)
156{
157 std::string errorString = "";
158 errors.getErrorString(errorString);
159 stream << errorString;
160 return stream;
161}
162
163
164/// The SamValidator class contains static methods for validating the SAM/BAM
165/// Record and each of its fields. The generic isValid method performs all of
166/// the other validations. The SamValidator methods return whether or not what
167/// is being validated is valid. True means it is valid, false means it is not.
168/// The specifics of the invalid value(s) are contained in the
169/// SamValidationErrors object that is passed in (by reference) to the method.
170/// The specific errors can be pulled out of that object.
171/// TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet
172/// validate all fields!!!
174{
175public:
176
177 /// Validates whether or not the specified SamRecord is valid, calling
178 /// all of the other validations.
179 /// TODO: more validation needs to be added.
180 /// \param samHeader header associated with the record to be validated.
181 /// \param samRecord record to be validated.
182 /// \param validationErrors status to append any errors too.
183 /// \return true if it is valid, false and appends to SamValidationErrors
184 /// if it is not
185 static bool isValid(SamFileHeader& samHeader, SamRecord& samRecord,
186 SamValidationErrors& validationErrors);
187
188 /// Determines whether or not the specified qname is valid.
189 /// Validation for QNAME is:
190 /// a) length of the qname string is the same as the read name length
191 /// b) length is between 1 and 254.
192 /// c) [ \t\n\r] are not allowed in the name.
193 /// \param qname the read/query name.
194 /// \param qnameLen length of the read including the null (result of
195 /// SamRecord::getReadNameLength().
196 /// \param validationErrors status to append any errors too.
197 /// \return true if it is valid, false and appends to SamValidationErrors
198 /// if it is not
199 static bool isValidQname(const char* qname, uint8_t qnameLen,
200 SamValidationErrors& validationErrors);
201
202 /// Determines whether or not the flag is valid.
203 /// TODO: currently no validation is done on the flag.
204 /// \param flag flag to be validated.
205 /// \param validationErrors status to append any errors too.
206 /// \return true if it is valid, false and appends to SamValidationErrors
207 /// if it is not
208 static bool isValidFlag(uint16_t flag,
209 SamValidationErrors& validationErrors);
210
211 /// Validate the reference name including validating against the header.
212 /// 1) Cross validate the rname and the header.
213 /// 2) perform the validation in the method that doesn't take the header.
214 /// \param samHeader header associated with the rname to be validated.
215 /// \param rname reference name to be validated.
216 /// \param validationErrors status to append any errors too.
217 /// \return true if it is valid, false and appends to SamValidationErrors
218 /// if it is not
219 static bool isValidRname(SamFileHeader& samHeader,
220 const char* rname,
221 SamValidationErrors& validationErrors);
222 /// Validate the rname without validating against the header.
223 /// Validation for RNAME is:
224 /// a) cannot be 0 length.
225 /// b) [ \t\n\r@=] are not allowed in the name.
226 /// \param rname reference name to be validated.
227 /// \param validationErrors status to append any errors too.
228 /// \return true if it is valid, false and appends to SamValidationErrors
229 /// if it is not
230 static bool isValidRname(const char* rname,
231 SamValidationErrors& validationErrors);
232
233 /// Validate whether or not the specified reference id is valid.
234 /// Validation for rID is:
235 /// a) must be between -1 and the number of refInfo.
236 /// -1 is allowed, and otherwise it must properly index into the array.
237 /// \param refID reference id to be validated.
238 /// \param refInfo sam reference information containing the mapping
239 /// from reference id to reference name for this refID.
240 /// \param validationErrors status to append any errors too.
241 /// \return true if it is valid, false and appends to SamValidationErrors
242 /// if it is not
243 static bool isValidRefID(int32_t refID, const SamReferenceInfo& refInfo,
244 SamValidationErrors& validationErrors);
245
246 /// Validate the refeference position.
247 /// Validation for pos is:
248 /// a) must be between 0 and (2^29)-1.
249 /// \param pos position to be validated.
250 /// \param validationErrors status to append any errors too.
251 /// \return true if it is valid, false and appends to SamValidationErrors
252 /// if it is not
253 static bool isValid1BasedPos(int32_t pos,
254 SamValidationErrors& validationErrors);
255
256 /// Validate the mapping quality.
257 /// TODO: currently no validation is done on the mapping quality.
258 /// \param mapQuality mapping quality to be validated.
259 /// \param validationErrors status to append any errors too.
260 /// \return true if it is valid, false and appends to SamValidationErrors
261 /// if it is not
262 static bool isValidMapQuality(uint8_t mapQuality,
263 SamValidationErrors& validationErrors);
264
265 /// Validate the sequence, but not against the cigar or quality string.
266 /// Validation against cigar is done in isValidCigar.
267 /// Validation against the quality string is done in isValidQuality.
268 /// TODO: currently no validation is done in this method.
269 /// \param samRecord record whose sequence should be validated.
270 /// \param validationErrors status to append any errors too.
271 /// \return true if it is valid, false and appends to SamValidationErrors
272 /// if it is not
273 static bool isValidSequence(SamRecord& samRecord,
274 SamValidationErrors& validationErrors);
275
276 /// Validate the cigar. Cigar validation depends on sequence.
277 /// Validation for CIGAR is:
278 /// a) cannot be 0 length.
279 /// if not "*", validate the following:
280 /// b) must have an integer length for each operator (if not "*"). TODO
281 /// c) all operators must be valid (if not "*"). TODO
282 /// d) evaluates to the same read length as the sequence string.
283 /// \param samRecord record whose cigar should be validated.
284 /// \param validationErrors status to append any errors too.
285 /// \return true if it is valid, false and appends to SamValidationErrors
286 /// if it is not
287 static bool isValidCigar(SamRecord& samRecord,
288 SamValidationErrors& validationErrors);
289
290 /// Validate the cigar. Cigar validation depends on sequence.
291 /// Validation for CIGAR is:
292 /// a) cannot be 0 length.
293 /// if not "*", validate the following:
294 /// b) must have an integer length for each operator (if not "*"). TODO
295 /// c) all operators must be valid (if not "*"). TODO
296 /// d) evaluates to the same read length as the sequence string.
297 /// \param cigar cigar string to be validated.
298 /// \param sequence sequence to check the cigar against.
299 /// \param validationErrors status to append any errors too.
300 /// \return true if it is valid, false and appends to SamValidationErrors
301 /// if it is not
302 static bool isValidCigar(const char* cigar, const char* sequence,
303 SamValidationErrors& validationErrors);
304
305 /// Validate the cigar. Cigar validation depends on sequence.
306 /// Validation for CIGAR is:
307 /// a) cannot be 0 length.
308 /// if not "*", validate the following:
309 /// b) TODO: must have an integer length for each operator (if not "*").
310 /// c) TODO: all operators must be valid (if not "*").
311 /// d) evaluates to the same read length as the sequence string.
312 /// \param cigar cigar string to be validated.
313 /// \param seqLen sequence length to check the cigar against.
314 /// \param validationErrors status to append any errors too.
315 /// \return true if it is valid, false and appends to SamValidationErrors
316 /// if it is not
317 static bool isValidCigar(const char* cigar,
318 int seqLen,
319 SamValidationErrors& validationErrors);
320
321 /// TODO: validate the mate/next fragment's reference name.
322 /// \return true if it is valid, false and appends to SamValidationErrors
323 /// if it is not
324 static bool isValidMrnm();
325
326 /// TODO: validate the mate/next fragment's position.
327 /// \return true if it is valid, false and appends to SamValidationErrors
328 /// if it is not
329 static bool isValidMpos();
330
331 /// TODO: validate the insertion size/observed template length.
332 /// \return true if it is valid, false and appends to SamValidationErrors
333 /// if it is not
334 static bool isValidIsize();
335
336 /// TODO, validate the sequence.
337 /// \return true if it is valid, false and appends to SamValidationErrors
338 /// if it is not
339 static bool isValidSeq();
340
341 /// Validate the base quality.
342 /// Quality validation depends on sequence.
343 /// Validation for quality is:
344 /// a) quality & sequence are the same length if both are specified.
345 /// TODO: more validation.
346 /// \param samRecord record whose quality should be validated.
347 /// \param validationErrors status to append any errors too.
348 /// \return true if it is valid, false and appends to SamValidationErrors
349 /// if it is not
350 static bool isValidQuality(SamRecord& samRecord,
351 SamValidationErrors& validationErrors);
352
353 /// Validate the base quality.
354 /// Quality validation depends on sequence.
355 /// Validation for quality is:
356 /// a) quality & sequence are the same length if both are specified.
357 /// TODO: more validation.
358 /// \param quality quality string to be validated.
359 /// \param seqLen sequence length to check the quality against.
360 /// \param validationErrors status to append any errors too.
361 /// \return true if it is valid, false and appends to SamValidationErrors
362 /// if it is not
363 static bool isValidQuality(const char* quality, const char* sequence,
364 SamValidationErrors& validationErrors);
365
366 /// Validate the base quality.
367 /// Quality validation depends on sequence.
368 /// Validation for quality is:
369 /// a) quality & sequence are the same length if both are specified.
370 /// TODO: more validation.
371 /// \param quality quality string to be validated.
372 /// \param seqLen sequence length to check the quality against.
373 /// \param validationErrors status to append any errors too.
374 /// \return true if it is valid, false and appends to SamValidationErrors
375 /// if it is not
376 bool static isValidQuality(const char* quality,
377 int seqLength,
378 SamValidationErrors& validationErrors);
379
380 /// Validate the tags.
381 /// Validation for tags is:
382 /// a) check that the "MD" tag is correct if it is present.
383 /// TODO: more validation.
384 /// \param samRecord record whose tags should be validated.
385 /// \param validationErrors status to append any errors too.
386 /// \return true if it is valid, false and appends to SamValidationErrors
387 /// if it is not
388 static bool isValidTags(SamRecord& samRecord,
389 SamValidationErrors& validationErrors);
390
391 /// TODO validate the tag vtype
392 /// \return true if it is valid, false and appends to SamValidationErrors
393 /// if it is not
394 static bool isValidVtype();
395
396 /// TODO validate the tag vtype
397 /// \return true if it is valid, false and appends to SamValidationErrors
398 /// if it is not
399 static bool isValidValue();
400};
401
402
403#endif
This class allows a user to get/set the fields in a SAM/BAM Header.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition SamRecord.h:52
Class for tracking the reference information mapping between the reference ids and the reference name...
The SamValidationError class describes a validation error that occured, containing the error type,...
Type getType() const
Return the type enum of this validation error object.
const char * getSeverityString() const
Return the string representing this object's severity of validation error.
void printError() const
Print a formatted output of the error to cerr.
void getErrorString(std::string &errorString) const
Get the error string representing this object's error.
Severity
Severity of the error.
@ WARNING
Warning is used if it is just an invalid value.
@ ERROR
Error is used if parsing could not succeed.
const char * getMessage() const
Return the error message of this validation error object.
Type
Type of the error.
@ INVALID_REF_ID
Invalid reference id.
@ INVALID_TAG
Invalid tag.
@ INVALID_QNAME
Invalid read/query name.
@ INVALID_MRNM
Invalid mate/next fragment reference name.
@ INVALID_CIGAR
Invalid CIGAR.
@ INVALID_MAPQ
Invalid mapping quality.
@ INVALID_POS
Invalid position.
@ INVALID_RNAME
Invalid reference name.
@ INVALID_QUAL
Invalid base quality.
Severity getSeverity() const
Return the severity enum of this validation error object.
const char * getTypeString() const
Return the string representing this object's type of validation error.
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
const SamValidationError * getNextError()
Return a pointer to the next error without removing it from the container, and returning null once al...
SamValidationErrors()
Constructor.
void resetErrorIter()
Reset the iterator to the begining of the errors.
void clear()
Remove all the errors from the container.
unsigned int numErrors()
Return the number of validation errors contained in this object.
void addError(SamValidationError::Type newType, SamValidationError::Severity newSeverity, const char *newMessage)
Add the specified error to this container.
~SamValidationErrors()
Destructor.
The SamValidator class contains static methods for validating the SAM/BAM Record and each of its fiel...
static bool isValidQname(const char *qname, uint8_t qnameLen, SamValidationErrors &validationErrors)
Determines whether or not the specified qname is valid.
static bool isValidVtype()
TODO validate the tag vtype.
static bool isValidTags(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the tags.
static bool isValidFlag(uint16_t flag, SamValidationErrors &validationErrors)
Determines whether or not the flag is valid.
static bool isValidIsize()
TODO: validate the insertion size/observed template length.
static bool isValidQuality(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the base quality.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
static bool isValid1BasedPos(int32_t pos, SamValidationErrors &validationErrors)
Validate the refeference position.
static bool isValidSeq()
TODO, validate the sequence.
static bool isValidMpos()
TODO: validate the mate/next fragment's position.
static bool isValidRname(SamFileHeader &samHeader, const char *rname, SamValidationErrors &validationErrors)
Validate the reference name including validating against the header.
static bool isValidRefID(int32_t refID, const SamReferenceInfo &refInfo, SamValidationErrors &validationErrors)
Validate whether or not the specified reference id is valid.
static bool isValidCigar(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the cigar.
static bool isValidMapQuality(uint8_t mapQuality, SamValidationErrors &validationErrors)
Validate the mapping quality.
static bool isValidValue()
TODO validate the tag vtype.
static bool isValidSequence(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the sequence, but not against the cigar or quality string.
static bool isValidMrnm()
TODO: validate the mate/next fragment's reference name.