libStatGen Software 1
Loading...
Searching...
No Matches
SamFileHeader.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "SamFileHeader.h"
19#include "SamHeaderSQ.h"
20#include "SamHeaderRG.h"
21
22
23const std::string SamFileHeader::EMPTY_RETURN = "";
24
25SamFileHeader::SamFileHeader()
26 : myHD(NULL),
27 myReferenceInfo(),
28 myErrorMessage("")
29{
30 resetHeader();
31
32 mySQs.setCaseSensitive(true);
33 myRGs.setCaseSensitive(true);
34 myPGs.setCaseSensitive(true);
35}
36
37
38SamFileHeader::~SamFileHeader()
39{
41}
42
43
44// Copy Constructor
45SamFileHeader::SamFileHeader(const SamFileHeader& header)
46{
47 copy(header);
48}
49
50
51// Overload operator = to copy the passed in header into this header.
53{
54 copy(header);
55 return(*this);
56}
57
58
60{
61 // Check to see if the passed in value is the same as this.
62 if(this == &header)
63 {
64 return(true);
65 }
66
68
69 // Copy the records by getting the other header's header string
70 // and parsing it.
71 std::string newString;
72 bool status = header.getHeaderString(newString);
73 String newHeaderString = newString.c_str();
74
75 status &= parseHeader(newHeaderString);
76
77 myCurrentHeaderIndex = header.myCurrentHeaderIndex;
78 myCurrentCommentIndex = header.myCurrentCommentIndex;
79
80 // Clear the reference info and copy it to ensure it is the same.
81 myReferenceInfo.clear();
82 // Copy Reference contigs, hash, lengths.
83 myReferenceInfo = header.myReferenceInfo;
84
85 return(status);
86}
87
88
89// Reset the header for a new entry, clearing out previous values.
91{
92 myReferenceInfo.clear();
93
94 // Clear the pointers to the header records. They are deleted when the
95 // vector is cleaned up.
96 myHD = NULL;
97 mySQs.Clear();
98 myRGs.Clear();
99 myPGs.Clear();
100
101 // Delete the header records and clear the vector.
102 for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size();
103 headerIndex++)
104 {
105 if(myHeaderRecords[headerIndex] != NULL)
106 {
107 delete myHeaderRecords[headerIndex];
108 myHeaderRecords[headerIndex] = NULL;
109 }
110 }
111 myHeaderRecords.clear();
112
113 // Reset the iterator for the header lines.
115
116 // Reset the comment iterator.
118
119 // Reset the individual type header iterators.
123
124 // Clear the comments
125 myComments.clear();
126}
127
128
129// Set the passed in string to the entire header string. Clearing its
130// current contents.
131bool SamFileHeader::getHeaderString(std::string& header) const
132{
133 header.clear();
134
135 // Keep getting header lines until there are no more - false returned.
136 unsigned int index = 0;
137 while(getHeaderLine(index, header) != false)
138 {
139 ++index;
140 }
141
142 return(true);
143}
144
145
146int SamFileHeader::getReferenceID(const String & referenceName, bool addID)
147{
148 return(myReferenceInfo.getReferenceID(referenceName, addID));
149}
150
151
152int SamFileHeader::getReferenceID(const char* referenceName, bool addID)
153{
154 return(myReferenceInfo.getReferenceID(referenceName, addID));
155}
156
157
159{
160 return(myReferenceInfo.getReferenceLabel(id));
161}
162
163
164// Get the Reference Information
166{
167 return(myReferenceInfo);
168}
169
170
171// Get the Reference Information for updating separately when reading
172// BAMs...should only be called by BamInterface.
173SamReferenceInfo& SamFileHeader::getReferenceInfoForBamInterface()
174{
175 return(myReferenceInfo);
176}
177
178
179// Add a header line that has an const char* value.
180bool SamFileHeader::addHeaderLine(const char* type, const char* tag,
181 const char* value)
182{
183 String headerLine;
184 headerLine += "@";
185 headerLine += type;
186 headerLine += "\t";
187 headerLine += tag;
188 headerLine += ":";
189 headerLine += value;
190 return(addHeaderLine(headerLine.c_str()));
191}
192
193
194// Add a header line that is already preformatted in a const char*.
195bool SamFileHeader::addHeaderLine(const char* headerLine)
196{
197 // Parse the added header line.
198 String headerString = headerLine;
199 return(parseHeader(headerString));
200}
201
202
203// Add a header line that is already preformatted in a const char*.
204bool SamFileHeader::addHeader(const char* header)
205{
206 // Parse the added header line.
207 String headerString = header;
208 return(parseHeader(headerString));
209}
210
211
212// Add a comment.
213bool SamFileHeader::addComment(const char* comment)
214{
215 if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
216 {
217 // Valid comment, so add it.
218 myComments.push_back(comment);
219 }
220 return(true);
221}
222
223
224// Add the specified tag and value to the HD header.
225bool SamFileHeader::setHDTag(const char* tag, const char* value)
226{
227 if(myHD == NULL)
228 {
229 // Need to create the HD line.
230 myHD = new SamHeaderHD();
231 if(myHD == NULL)
232 {
233 // New failed, return false.
234 myErrorMessage = "SamFileHeader: Failed to allocate a new HD tag";
235 return(false);
236 }
237 // Succeeded to create the line, add it to the
238 // list.
239 myHeaderRecords.push_back(myHD);
240 }
241 if(!myHD->setTag(tag, value))
242 {
243 myErrorMessage = "SamFileHeader: Failed to set the specified HD tag";
244 return(false);
245 }
246 return(true);
247}
248
249
250// Add the specified tag and value to the SQ header with the specified name.
251// If the header does not yet exist, the header is added.
252bool SamFileHeader::setSQTag(const char* tag, const char* value,
253 const char* name)
254{
255 // Get the SQ record for the specified name.
256 SamHeaderSQ* sq = getSQ(name);
257 if(sq == NULL)
258 {
259 // The SQ does not yet exist.
260 // Make sure the tag is LN.
261 if(strcmp(tag, "LN") != 0)
262 {
263 // LN is required so must be the first tag added
264 myErrorMessage =
265 "SamFileHeader:Failed to add the specified SQ key, LN not specified.";
266 return(false);
267 }
268
269 // Add it.
270 sq = new SamHeaderSQ();
271
272 if(sq == NULL)
273 {
274 // Could not create the header record.
275 myErrorMessage = "SamFileHeader: Failed to allocate a new SQ tag";
276 return(false);
277 }
278
279 // Created the header record, so add it to the list of SQ lines.
280 mySQs.Add(name, sq);
281 myHeaderRecords.push_back(sq);
282 // value is the length, so update the reference info.
283 myReferenceInfo.add(name, atoi(value));
284
285 // Add the key tag
286 if(!sq->addKey(name))
287 {
288 // Failed to add the key tag, return false.
289 myErrorMessage = "SamFileHeader:Failed to add the specified SQ key";
290 return(false);
291 }
292 }
293 else if(strcmp(tag, "LN") == 0)
294 {
295 // Cannot modify/remove the LN tag.
296 myErrorMessage = "SamFileHeader:Cannot modify/remove the SQ's LN tag";
297 return(false);
298 }
299
300 if(!sq->setTag(tag, value))
301 {
302 myErrorMessage = "Failed to set the specified SQ tag";
303 return(false);
304 }
305 return(true);
306}
307
308
309// Add the specified tag and value to the RG header with the read group
310// identifier. If the header does not yet exist, the header is added.
311bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id)
312{
313 // Get the RG record for the specified name.
314 SamHeaderRG* rg = getRG(id);
315 if(rg == NULL)
316 {
317 // The RG does not yet exist.
318 // Add it.
319 rg = new SamHeaderRG();
320
321 if(rg == NULL)
322 {
323 // Could not create the header record.
324 myErrorMessage = "Failed to allocate a new RG tag";
325 return(false);
326 }
327
328 // Created the header record, so add it to the list of RG lines.
329 myRGs.Add(id, rg);
330 myHeaderRecords.push_back(rg);
331
332 // Add the key tag
333 if(!rg->addKey(id))
334 {
335 // Failed to add the key tag, return false.
336 myErrorMessage = "Failed to add the specified RG key";
337 return(false);
338 }
339 }
340
341 if(!rg->setTag(tag, value))
342 {
343 myErrorMessage = "Failed to set the specified RG tag";
344 return(false);
345 }
346 return(true);
347}
348
349
350// Add the specified tag and value to the PG header with the specified id.
351// If the header does not yet exist, the header is added.
352// Add the specified tag and value to the PG header.
353bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id)
354{
355 // Get the PG record for the specified name.
356 SamHeaderPG* pg = getPG(id);
357 if(pg == NULL)
358 {
359 // The PG does not yet exist.
360 // Add it.
361 pg = new SamHeaderPG();
362
363 if(pg == NULL)
364 {
365 // Could not create the header record.
366 myErrorMessage = "Failed to allocate a new PG tag";
367 return(false);
368 }
369
370 // Created the header record, so add it to the list of PG lines.
371 myPGs.Add(id, pg);
372 myHeaderRecords.push_back(pg);
373
374 // Add the key tag
375 if(!pg->addKey(id))
376 {
377 // Failed to add the key tag, return false.
378 myErrorMessage = "Failed to add the specified PG key";
379 return(false);
380 }
381 }
382
383 if(!pg->setTag(tag, value))
384 {
385 myErrorMessage = "Failed to set the specified PG tag";
386 return(false);
387 }
388 return(true);
389}
390
391
392// Add the HD record to the header.
394{
395 // If there is already an HD header or if null
396 // was passed in, return false.
397 if(myHD != NULL)
398 {
399 myErrorMessage = "Failed add an HD tag - there is already one";
400 return(false);
401 }
402 if(hd == NULL)
403 {
404 myErrorMessage = "Failed add an HD tag - no tag specified";
405 return(false);
406 }
407 myHD = hd;
408
409 myHeaderRecords.push_back(myHD);
410 return(true);
411}
412
413
414// Add the SQ record to the header.
416{
417 if(sq == NULL)
418 {
419 // null pointer passed in, can't add it.
420 myErrorMessage = "SAM/BAM Header line failed to allocate SQ.";
421 return(false);
422 }
423 const char* name = sq->getTagValue("SN");
424 const char* length = sq->getTagValue("LN");
425 if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
426 {
427 // SN is not set, so can't add it.
428 myErrorMessage =
429 "SAM/BAM Header line failure: Skipping SQ line that is missing the SN field.";
430 return(false);
431 }
432 if(strcmp(length, EMPTY_RETURN.c_str()) == 0)
433 {
434 // LN is not set, so can't add it.
435 myErrorMessage =
436 "SAM/BAM Header line failure: Skipping SQ line that is missing the LN field.";
437 return(false);
438 }
439
440 // Determine whether or not a record with this
441 // key is already in the hash.
442 if(mySQs.Find(name) < 0)
443 {
444 // It is not already in the hash so add it.
445 mySQs.Add(name, sq);
446 myHeaderRecords.push_back(sq);
447 myReferenceInfo.add(name, atoi(length));
448 return(true);
449 }
450
451 // It is already in the hash, so cannot be added.
452 myErrorMessage = "SAM/BAM Header line failure: Skipping SQ line that has a repeated SN field.";
453 return(false);
454}
455
456
457// Add the RG record to the header.
459{
460 if(rg == NULL)
461 {
462 // null pointer passed in, can't add it.
463 myErrorMessage = "SAM/BAM Header line failed to allocate RG.";
464 return(false);
465 }
466 const char* id = rg->getTagValue("ID");
467 if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
468 {
469 // ID is not set, so can't add it.
470 myErrorMessage = "SAM/BAM Header line failure: Skipping RG line that is missing the ID field.";
471 return(false);
472 }
473
474 // Determine whether or not a record with this
475 // key is already in the hash.
476 if(myRGs.Find(id) < 0)
477 {
478 // It is not already in the hash so
479 // add it.
480 myRGs.Add(id, rg);
481 myHeaderRecords.push_back(rg);
482 return(true);
483 }
484
485 // It is already in the hash, so cannot be added.
486 myErrorMessage = "SAM/BAM Header line failure: Skipping RG line that has a repeated ID field.";
487 return(false);
488}
489
490
491// Add the PG record to the header.
493{
494 // If a null pointer was passed in, return false.
495 if(pg == NULL)
496 {
497 myErrorMessage = "SAM/BAM Header line failed to allocate PG.";
498 return(false);
499 }
500 const char* id = pg->getTagValue("ID");
501 if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
502 {
503 // ID is not set, so can't add the header record.
504 myErrorMessage = "SAM/BAM Header line failure: Skipping PG line that is missing the ID field.";
505 return(false);
506 }
507
508 // Determine whether or not a record with this
509 // key is already in the hash.
510 if(myPGs.Find(id) < 0)
511 {
512 // It is not already in the hash so
513 // add it.
514 myPGs.Add(id, pg);
515 myHeaderRecords.push_back(pg);
516 return(true);
517 }
518
519 // It is already in the hash, so cannot be added.
520 myErrorMessage = "SAM/BAM Header line failure: Skipping PG line that has a repeated ID field.";
521 return(false);
522}
523
524
525// Add the RG record to the header.
527{
528 SamHeaderRecord* newRec = hdrRec.createCopy();
529 bool returnVal = true;
530 switch(newRec->getType())
531 {
533 returnVal = addHD((SamHeaderHD*)newRec);
534 break;
536 returnVal = addPG((SamHeaderPG*)newRec);
537 break;
539 returnVal = addRG((SamHeaderRG*)newRec);
540 break;
542 returnVal = addSQ((SamHeaderSQ*)newRec);
543 break;
544 default:
545 myErrorMessage = "Failed to copy a header record, unknown type.";
546 returnVal = false;
547 break;
548 }
549 return(returnVal);
550}
551
552
553// Remove the HD record.
555{
556 if(myHD != NULL)
557 {
558 // Reset the record. Do not delete it since it is in the headerRecords
559 // vector and it is not worth the time to remove it from the middle of
560 // that vector since this is the header and the space does not need
561 // to be conserved.
562 myHD->reset();
563
564 // Set myHD to null so a new HD could be added.
565 myHD = NULL;
566 }
567
568 return(true);
569}
570
571
572// Remove the SQ record associated with the specified name.
573bool SamFileHeader::removeSQ(const char* name)
574{
575 // Look up the name in the hash.
576 int hashIndex = mySQs.Find(name);
577 if(hashIndex < 0)
578 {
579 // Not found in the hash, so nothing to
580 // delete, return true it does not exist
581 // in the hash.
582 return(true);
583 }
584
585 // Get the SQ.
586 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex));
587
588 if(sq == NULL)
589 {
590 // sq is null, this is an error since hashIndex was greater than 0,
591 // so it should have been found.
592 myErrorMessage = "SAM/BAM Header line failed to get SQ object.";
593 return(false);
594 }
595
596 // Reset the record. Do not delete it since it is in the headerRecords
597 // vector and it is not worth the time to remove it from the middle of
598 // that vector since this is the header and the space does not need
599 // to be conserved.
600 sq->reset();
601
602 // Delete the entry from the hash.
603 mySQs.Delete(hashIndex);
604
605 return(true);
606}
607
608
609// Remove the RG record associated with the specified id.
610bool SamFileHeader::removeRG(const char* id)
611{
612 // Look up the id in the hash.
613 int hashIndex = myRGs.Find(id);
614 if(hashIndex < 0)
615 {
616 // Not found in the hash, so nothing to
617 // delete, return true it does not exist
618 // in the hash.
619 return(true);
620 }
621
622 // Get the RG.
623 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex));
624
625 if(rg == NULL)
626 {
627 // rg is null, this is an error since hashIndex was greater than 0,
628 // so it should have been found.
629 myErrorMessage = "SAM/BAM Header line failed to get RG object.";
630 return(false);
631 }
632
633 // Reset the record. Do not delete it since it is in the headerRecords
634 // vector and it is not worth the time to remove it from the middle of
635 // that vector since this is the header and the space does not need
636 // to be conserved.
637 rg->reset();
638
639 // Delete the entry from the hash.
640 myRGs.Delete(hashIndex);
641
642 return(true);
643}
644
645
646// Remove the PG record associated with the specified id.
647bool SamFileHeader::removePG(const char* id)
648{
649 // Look up the id in the hash.
650 int hashIndex = myPGs.Find(id);
651 if(hashIndex < 0)
652 {
653 // Not found in the hash, so nothing to
654 // delete, return true it does not exist
655 // in the hash.
656 return(true);
657 }
658
659 // Get the PG.
660 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex));
661
662 if(pg == NULL)
663 {
664 // pg is null, this is an error since hashIndex was greater than 0,
665 // so it should have been found.
666 myErrorMessage = "SAM/BAM Header line failed to get PG object.";
667 return(false);
668 }
669
670 // Reset the record. Do not delete it since it is in the headerRecords
671 // vector and it is not worth the time to remove it from the middle of
672 // that vector since this is the header and the space does not need
673 // to be conserved.
674 pg->reset();
675
676 // Delete the entry from the hash.
677 myPGs.Delete(hashIndex);
678
679 return(true);
680}
681
682
683const char* SamFileHeader::getHDTagValue(const char* tag)
684{
685 if(myHD == NULL)
686 {
687 // return blank since there is no HD type.
688 return(EMPTY_RETURN.c_str());
689 }
690 return(myHD->getTagValue(tag));
691}
692
693
694// Get the value associated with the specified tag on the SQ line with
695// the specified sequence name.
696const char* SamFileHeader::getSQTagValue(const char* tag, const char* name)
697{
698 // Look up the name in the hash to get the associated SQ object.
699 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name));
700
701 // If it is NULL - the tag was not found, so return
702 if(sq == NULL)
703 {
704 return(EMPTY_RETURN.c_str());
705 }
706
707 // Found the object, so return the SQ Tag.
708 return(sq->getTagValue(tag));
709}
710
711
712// Get the value associated with the specified tag on the RG line with
713// the specified read group identifier.
714const char* SamFileHeader::getRGTagValue(const char* tag, const char* id)
715{
716 // Look up the id in the hash to get the associated RG object.
717 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id));
718
719 // If it is NULL - the tag was not found, so return
720 if(rg == NULL)
721 {
722 return(EMPTY_RETURN.c_str());
723 }
724
725 // Found the object, so return the RG Tag.
726 return(rg->getTagValue(tag));
727}
728
729
730const char* SamFileHeader::getPGTagValue(const char* tag, const char* id)
731{
732 // Look up the id in the hash to get the associated PG object.
733 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id));
734
735 // If it is NULL - the tag was not found, so return
736 if(pg == NULL)
737 {
738 return(EMPTY_RETURN.c_str());
739 }
740
741 // Found the object, so return the PG Tag.
742 return(pg->getTagValue(tag));
743}
744
745
746// Get the number of SQ objects.
748{
749 return(mySQs.Entries());
750}
751
752
753// Get the number of RG objects.
755{
756 return(myRGs.Entries());
757}
758
759
760// Get the number of PG objects.
762{
763 return(myPGs.Entries());
764}
765
766
767// Get the HD object.
769{
770 return(myHD);
771}
772
773
774// Get the SQ object with the specified sequence name.
776{
777 return((SamHeaderSQ*)(mySQs.Object(name)));
778}
779
780
781// Get the RG object with the specified read group identifier.
783{
784 return((SamHeaderRG*)(myRGs.Object(id)));
785}
786
787
788// Get the PG object.
790{
791 return((SamHeaderPG*)(myPGs.Object(id)));
792}
793
794
795// Return the value of the SO tag.
796// If this field does not exist, EMPTY_RETURN.c_str() is returned.
798{
799 if(myHD == NULL)
800 {
801 // No HD, so return blank EMPTY_RETURN.c_str()
802 return(EMPTY_RETURN.c_str());
803 }
804 return(myHD->getSortOrder());
805}
806
807
808// Deprecated way of getting the sort order from the file.
810{
811 return(getSortOrder());
812}
813
814
815// Get the next SQ header record. After all SQ headers have been retrieved,
816// NULL is returned until a reset is called.
822
823
824// Get the next RG header record. After all RG headers have been retrieved,
825// NULL is returned until a reset is called.
831
832
833// Get the next PG header record. After all PG headers have been retrieved,
834// NULL is returned until a reset is called.
840
841
842// Reset to the beginning of the header records so the next call
843// to getNextSQRecord returns the first SQ header record.
845{
846 myCurrentSQIndex = 0;
847}
848
849
850// Reset to the beginning of the header records so the next call
851// to getNextRGRecord returns the first RG header record.
853{
854 myCurrentRGIndex = 0;
855}
856
857
858// Reset to the beginning of the header records so the next call
859// to getNextPGRecord returns the first PG header record.
861{
862 myCurrentPGIndex = 0;
863}
864
865
866// Get the next header record of the specified type.
867// Pass in the index to start looking at and the type to look for.
868// Update the index.
869// After all headers of that type have been retrieved,
870// NULL is returned until a reset is called for that type.
873{
874 SamHeaderRecord* foundRecord = NULL;
875 // Loop until a record is found or until out of range of the
876 // headerRecord vector.
877 while((index < myHeaderRecords.size())
878 && (foundRecord == NULL))
879 {
880 // Get the next record.
881 foundRecord = myHeaderRecords[index];
882 // Either way, increment the index.
883 ++index;
884 // Check to see if the next record is active.
885 if(!foundRecord->isActiveHeaderRecord())
886 {
887 // Not active, so clear the pointer.
888 foundRecord = NULL;
889 }
890 // Check to see if the record is the right type.
891 else if(foundRecord->getType() != headerType)
892 {
893 // Not the right type, so clear the pointer.
894 foundRecord = NULL;
895 }
896 }
897
898 // Return the record if it was found. Will be null if none were found.
899 return(foundRecord);
900}
901
902
903// Get the next header record. After all headers have been retrieved,
904// NULL is returned until a reset is called. Does not return the
905// Comment lines.
906// NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
907// same iterator.
909{
910 // Get the next header record
911 SamHeaderRecord* foundRecord = NULL;
912 // Loop until a record is found or until out of range of the
913 // headerRecord vector.
914 while((myCurrentHeaderIndex < myHeaderRecords.size())
915 && (foundRecord == NULL))
916 {
917 // Get the next record.
918 foundRecord = myHeaderRecords[myCurrentHeaderIndex];
919 // Either way, increment the index.
920 ++myCurrentHeaderIndex;
921 // Check to see if the next record is active.
922 if(!foundRecord->isActiveHeaderRecord())
923 {
924 // Not active, so clear the pointer.
925 foundRecord = NULL;
926 }
927 }
928
929 // Return the record if it was found. Will be null if none were found.
930 return(foundRecord);
931}
932
933
934// Set the passed in string to the next header line. The passed in
935// string will be overwritten. If there are no more header lines or there
936// is an error, false is returned and the passed in string is set to EMPTY_RETURN.c_str()
937// until a rest is called.
938// Will also return the comment lines.
939// NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
940// same iterator.
941bool SamFileHeader::getNextHeaderLine(std::string &headerLine)
942{
943 headerLine = EMPTY_RETURN.c_str();
944
945 // Until the header is set, keep reading.
946 // Header could return EMPTY_RETURN.c_str() if the header line is blank.
947 while(headerLine == EMPTY_RETURN.c_str())
948 {
949 if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false)
950 {
951 // getHeaderLine failed, so stop processing, and return false.
952 return(false);
953 }
954 else
955 {
956 // In range, increment the index.
957 ++myCurrentHeaderIndex;
958 }
959 }
960 return(true);
961}
962
963
964// Reset to the beginning of the header records so the next call
965// to getNextHeaderRecord returns the first header line.
967{
968 myCurrentHeaderIndex = 0;
969}
970
971
972void SamFileHeader::appendCommentLines(std::string &commentLines)
973{
974 for(unsigned int i = 0; i < myComments.size(); i++)
975 {
976 commentLines += "@CO\t";;
977 commentLines += myComments[i];
978 commentLines += "\n";
979 }
980}
981
982
983// Returns the comment on the next comment line. Returns EMPTY_RETURN.c_str() if all comment
984// lines have been returned, until resetCommentIter is called.
986{
987 if(myCurrentCommentIndex < myComments.size())
988 {
989 return(myComments[myCurrentCommentIndex++].c_str());
990 }
991 // Already gone through all the comments, return EMPTY_RETURN.c_str().
992 return(EMPTY_RETURN.c_str());
993}
994
995
996// Resets to the beginning of the comments so getNextComment returns
997// the first comment.
999{
1000 myCurrentCommentIndex = 0;
1001}
1002
1003
1004// Parse the header.
1005bool SamFileHeader::parseHeader(String& header)
1006{
1007 std::string errorMessage = "";
1008 int numErrors = 0;
1009 int numValid = 0;
1010
1011 // Split the header into lines.
1012 std::vector<String>* types = header.Split('\n');
1013
1014 // Loop through each header line, parsing that line.
1015 for(uint32_t index = 0; index < types->size(); index++)
1016 {
1017 // Parse the header line.
1018 if(!parseHeaderLine(types->at(index)))
1019 {
1020 errorMessage += myErrorMessage;
1021 errorMessage += "\n";
1022 ++numErrors;
1023 }
1024 else
1025 {
1026 // valid header line
1027 ++numValid;
1028 }
1029 }
1030
1031 // Delete the types vector.
1032 delete types;
1033 types = NULL;
1034
1035 myErrorMessage = errorMessage;
1036 if((numErrors > 0) && (numValid == 0))
1037 {
1038 // Only errors.
1039 std::cerr << numErrors
1040 << " invalid SAM/BAM Header lines were skipped due to:\n"
1041 << errorMessage << std::endl;
1042 return(false);
1043 }
1044 else if(numErrors > 0)
1045 {
1046 // Some valid & some invalid.
1047 // Going to return true, but add note about the invalid lines.
1048 std::cerr << numErrors
1049 << " invalid SAM/BAM Header lines were skipped due to:\n"
1050 << errorMessage << std::endl;
1051 }
1052
1053 return(true);
1054}
1055
1056
1057// Parse one line of the header.
1058bool SamFileHeader::parseHeaderLine(const String& headerLine)
1059{
1060 // Check if the line starts with @CO.
1061 if((headerLine.Length() >= 4) && (headerLine[0] == '@') &&
1062 (headerLine[1] == 'C') && (headerLine[2] == 'O') &&
1063 (headerLine[3] == '\t'))
1064 {
1065 // Comment line.
1066 String comment = headerLine.SubStr(4);
1067 return(addComment(comment));
1068 }
1069
1070 StringArray tokens;
1071
1072 // Split the line by tabs.
1073 tokens.ReplaceColumns(headerLine, '\t');
1074
1075 if(tokens.Length() < 1)
1076 {
1077 // Nothing on this line, just return true.
1078 return(true);
1079 }
1080
1081 // Get the header type, the first column.
1082 if((tokens[0].Length() != 3) || (tokens[0][0] != '@'))
1083 {
1084 // The header type string is incorrect. Should be 3 characters
1085 // with the first one @.
1086 myErrorMessage = "SAM/BAM Header line does not start with @ & at least 2 chars.";
1087 return(false);
1088 }
1089
1090 bool status = true;
1091 if(tokens[0] == "@HD")
1092 {
1093 if(myHD == NULL)
1094 {
1095 // Create a new hd.
1096 myHD = new SamHeaderHD();
1097 if(myHD == NULL)
1098 {
1099 // Failed to allocate HD, so return false.
1100 myErrorMessage = "SAM/BAM Header line failed to allocate HD.";
1101 return(false);
1102 }
1103 myHeaderRecords.push_back(myHD);
1104 if(!myHD->setFields(tokens))
1105 {
1106 myErrorMessage = "SAM/BAM Header line failed to store HD record.";
1107 status = false;
1108 }
1109 }
1110 else
1111 {
1112 // HD already set, so return false.
1113 myErrorMessage = "SAM/BAM Header line failure: multiple HD records.";
1114 status = false;
1115 }
1116 }
1117 else if(tokens[0] == "@SQ")
1118 {
1119 // Create a new SQ record.
1120 SamHeaderSQ* sq = new SamHeaderSQ();
1121
1122 if(sq->setFields(tokens))
1123 {
1124 // sq fields were properly set, so add it to the list of
1125 // SQ lines.
1126 // myStatus set in the method.
1127 status &= addSQ(sq);
1128 }
1129 else
1130 {
1131 myErrorMessage = "SAM/BAM Header line failed to store SQ record.";
1132 status = false;
1133 }
1134 }
1135 else if(tokens[0] == "@RG")
1136 {
1137 // Create a new RG record.
1138 SamHeaderRG* rg = new SamHeaderRG();
1139
1140 if(rg->setFields(tokens))
1141 {
1142 // rg fields were properly set, so add it to the list of
1143 // RG lines.
1144 // myStatus set in the method.
1145 status &= addRG(rg);
1146 }
1147 else
1148 {
1149 myErrorMessage = "SAM/BAM Header line failed to store RG record.";
1150 status = false;
1151 }
1152 }
1153 else if(tokens[0] == "@PG")
1154 {
1155 // Create a new PG record.
1156 SamHeaderPG* pg = new SamHeaderPG();
1157
1158 if(pg->setFields(tokens))
1159 {
1160 // pg fields were properly set, so add it to the list of
1161 // PG lines.
1162 // myStatus set in the method.
1163 status &= addPG(pg);
1164 }
1165 else
1166 {
1167 myErrorMessage = "SAM/BAM Header line failed to store PG record.";
1168 status = false;
1169 }
1170 }
1171 else
1172 {
1173 // Unknown header type.
1174 myErrorMessage =
1175 "SAM/BAM Header line failure: Skipping unknown header type, ";
1176 myErrorMessage += (const char*)(tokens[0]);
1177 status = false;
1178 }
1179 return(status);
1180}
1181
1182
1183
1184// Set the passed in string to the header line at the specified index.
1185// It does NOT clear the current contents of header.
1186// NOTE: some indexes will return blank if the entry was deleted.
1187bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const
1188{
1189 // Check to see if the index is in range of the header records vector.
1190 if(index < myHeaderRecords.size())
1191 {
1192 // In range of the header records vector, so get the string for
1193 // that record.
1194 SamHeaderRecord* hdrRec = myHeaderRecords[index];
1195 hdrRec->appendString(header);
1196 return(true);
1197 }
1198 else
1199 {
1200 unsigned int commentIndex = index - myHeaderRecords.size();
1201 // Check to see if it is in range of the comments.
1202 if(commentIndex < myComments.size())
1203 {
1204 // It is in range of the comments, so add the type.
1205 header += "@CO\t";
1206 // Add the comment.
1207 header += myComments[commentIndex];
1208 // Add the new line.
1209 header += "\n";
1210 return(true);
1211 }
1212 }
1213 // Invalid index.
1214 return(false);
1215}
This class allows a user to get/set the fields in a SAM/BAM Header.
SamHeaderPG * getPG(const char *id)
Get the PG object with the specified id, returning NULL if there is no PG object with that key.
bool addPG(SamHeaderPG *pg)
Add the PG record to the header.
const char * getSortOrder()
Return the Sort Order value that is set in the Header, returning "" if this field does not exist.
const char * getSQTagValue(const char *tag, const char *name)
Get the value associated with the specified tag on the SQ line with the specified sequence name,...
SamHeaderSQ * getSQ(const char *name)
Get the SQ object with the specified sequence name, returning NULL if there is no SQ object with that...
SamHeaderHD * getHD()
Get the HD object, returning NULL if there is no HD record.
void resetRGRecordIter()
Reset to the beginning of the header records so the next call to getNextRGRecord returns the first RG...
bool setPGTag(const char *tag, const char *value, const char *id)
Set the specified tag to the specified value in the PG header with the specified id,...
const char * getHDTagValue(const char *tag)
Returns the value associated with the specified HD tag, returning "" if the tag does not exist in the...
bool addRG(SamHeaderRG *rg)
Add the RG record to the header.
bool getNextHeaderLine(std::string &headerLine)
Set the passed in string to the next header line, overwritting the passed in string.
SamHeaderRecord * getNextPGRecord()
Get the next PG header record.
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).
bool removePG(const char *id)
Remove PG record with the specified key.
bool addSQ(SamHeaderSQ *sq)
Add the SQ record to the header.
int getNumSQs()
Get the number of SQ objects.
bool removeRG(const char *id)
Remove RG record with the specified key.
bool setSQTag(const char *tag, const char *value, const char *name)
Set the specified tag to the specified value in the SQ header with the specified name,...
bool addComment(const char *comment)
Add the specified comment to the header (do not include "@CO" or "\n").
int getNumRGs()
Get the number of RG objects.
const char * getTagSO()
DEPRECATED.
SamHeaderRecord * getNextHeaderRecord()
Get the next header record, but not comment line.
bool addHD(SamHeaderHD *hd)
Add the HD record to the header.
const char * getNextComment()
Returns the comment on the next comment line.
const char * getRGTagValue(const char *tag, const char *id)
Get the value associated with the specified tag on the RG line with the specified read group identifi...
const String & getReferenceLabel(int id) const
Return the reference name (chromosome) for the specified reference id.
bool addHeaderLine(const char *type, const char *tag, const char *value)
Add a header line that is just one tag with a const char* value.
const SamReferenceInfo & getReferenceInfo() const
Get the Reference Information.
bool removeHD()
Remove the HD record.
void resetSQRecordIter()
Reset to the beginning of the header records so the next call to getNextSQRecord returns the first SQ...
bool setRGTag(const char *tag, const char *value, const char *id)
Set the specified tag to the specified value in the RG header with the specified id,...
bool getHeaderString(std::string &header) const
Set the passed in string to the entire header string, clearing its current contents.
bool copy(const SamFileHeader &header)
Copy method copies the passed in header into this header.
void appendCommentLines(std::string &commentLines)
Append all of the comment lines to the specified string.
void resetHeaderRecordIter()
Reset to the beginning of the header records so the next call to getNextHeaderRecord returns the firs...
int getNumPGs()
Get the number of PG objects.
SamHeaderRecord * getNextRGRecord()
Get the next RG header record.
SamHeaderRecord * getNextSQRecord()
Get the next SQ header record.
void resetHeader()
Initialize the header.
bool removeSQ(const char *name)
Remove SQ record with the specified key.
const char * getPGTagValue(const char *tag, const char *id)
Get the value associated with the specified tag on the RG line with the specified id,...
bool addHeader(const char *header)
Add a header that is already preformatted in a const char*.
void resetPGRecordIter()
Reset to the beginning of the header records so the next call to getNextPGRecord returns the first PG...
SamHeaderRG * getRG(const char *id)
Get the RG object with the specified read group identifier, returning NULL if there is no RG object w...
bool addRecordCopy(const SamHeaderRecord &hdrRec)
Add a copy of the specified header record to the header.
bool setHDTag(const char *tag, const char *value)
Set the specified tag to the specified value in the HD header, remove the tag by specifying value="".
SamFileHeader & operator=(const SamFileHeader &header)
Overload operator = to copy the passed in header into this header.
void resetCommentIter()
Resets to the beginning of the comments so getNextComment returns the first comment.
This class encapsulates the tag value pairs contained with a SAM Header line with accessors for getti...
const char * getTagValue(const char *tag) const
Return the value associated with the specified tag.
void reset()
Reset this header record to an empty state with no tags.
SamHeaderRecordType getType()
Return the type of this header record (HD, SQ, RG, or PG) as an enum.
bool setFields(const StringArray &tokens)
Set the fields from the passed in line.
bool isActiveHeaderRecord()
This record is active (true) if there is at least one tag set.
virtual SamHeaderRecord * createCopy() const =0
Return a pointer to a newly created header record of the appropriate type that is a copy of this reco...
bool appendString(std::string &header)
Appends the string representation of this header record to the passed in string.
bool addKey(const char *value)
Add the key tag with the specified value (not for HD headers).
SamHeaderRecordType
Specifies the Type for the sam header record (line).
@ SQ
Sequence Dictionary.
@ RG
Read Group.
bool setTag(const char *tag, const char *value)
Set the value of the specified tag to the specified value, deletes the tag when value is NULL.
Class for tracking the reference information mapping between the reference ids and the reference name...
void clear()
Reset this reference info.
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified name, if addID is set to true, a reference id will be created ...
const String & getReferenceLabel(int id) const
Get the reference name for the specified id, if the id is not found, return "*".
void add(const char *referenceSequenceName, int32_t referenceSequenceLength)
Add reference sequence name and reference sequence length.