libStatGen Software 1
Loading...
Searching...
No Matches
BamInterface.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "BamInterface.h"
19#include "CharBuffer.h"
20
21BamInterface::BamInterface()
22{
23}
24
25
26BamInterface::~BamInterface()
27{
28}
29
30
31// Read a BAM file's header.
32bool BamInterface::readHeader(IFILE filePtr, SamFileHeader& header,
33 SamStatus& status)
34{
35 if(filePtr == NULL)
36 {
37 // File is not open, return false.
39 "Cannot read header since the file pointer is null");
40 return(false);
41 }
42 if(filePtr->isOpen() == false)
43 {
45 "Cannot read header since the file is not open");
46 return(false);
47 }
48
49 // Clear the passed in header.
50 header.resetHeader();
51
52 int32_t headerLength;
53 int readSize = ifread(filePtr, &headerLength, sizeof(headerLength));
54
55 if(readSize != sizeof(headerLength))
56 {
57 String errMsg = "Failed to read the BAM header length, read ";
58 errMsg += readSize;
59 errMsg += " bytes instead of ";
60 errMsg += (unsigned int)sizeof(headerLength);
61 status.setStatus(SamStatus::FAIL_IO, errMsg.c_str());
62 return(false);
63 }
64
65 String headerStr;
66 if(headerLength > 0)
67 {
68 // Read the header.
69 readSize =
70 ifread(filePtr, headerStr.LockBuffer(headerLength + 1), headerLength);
71 headerStr[headerLength] = 0;
72 headerStr.UnlockBuffer();
73 if(readSize != headerLength)
74 {
75 // Failed to read the header.
76 status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM header.");
77 return(false);
78 }
79 }
80
81 // Parse the header that was read.
82 if(!header.addHeader(headerStr))
83 {
84 // Status is set in the method on failure.
86 return(false);
87 }
88
89 int referenceCount;
90 // Read the number of references sequences.
91 ifread(filePtr, &referenceCount, sizeof(int));
92
93 // Get and clear the reference info so it can be set
94 // from the bam reference table.
95 SamReferenceInfo& refInfo =
96 header.getReferenceInfoForBamInterface();
97 refInfo.clear();
98
99 CharBuffer refName;
100
101 // Read each reference sequence
102 for (int i = 0; i < referenceCount; i++)
103 {
104 int nameLength;
105 int rc;
106 // Read the length of the reference name.
107 rc = ifread(filePtr, &nameLength, sizeof(int));
108 if(rc != sizeof(int))
109 {
111 "Failed to read the BAM reference dictionary.");
112 return(false);
113 }
114
115 // Read the name.
116 refName.readFromFile(filePtr, nameLength);
117
118 // Read the length of the reference sequence.
119 int32_t refLen;
120 rc = ifread(filePtr, &refLen, sizeof(int));
121
122 if(rc != sizeof(int)) {
124 "Failed to read the BAM reference dictionary.");
125 return(false);
126 }
127
128 refInfo.add(refName.c_str(), refLen);
129 }
130
131 // Successfully read the file.
132 return(true);
133}
134
135
136bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
137 SamStatus& status)
138{
139 if((filePtr == NULL) || (filePtr->isOpen() == false))
140 {
141 // File is not open, return false.
143 "Cannot write header since the file pointer is null");
144 return(false);
145 }
146
147 char magic[4];
148 magic[0] = 'B';
149 magic[1] = 'A';
150 magic[2] = 'M';
151 magic[3] = 1;
152
153 // Write magic to the file.
154 ifwrite(filePtr, magic, 4);
155
156 ////////////////////////////////
157 // Write the header to the file.
158 ////////////////////////////////
159 // Construct a string containing the entire header.
160 std::string headerString = "";
161 header.getHeaderString(headerString);
162
163 int32_t headerLen = headerString.length();
164 int numWrite = 0;
165
166 // Write the header length.
167 numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t));
168 if(numWrite != sizeof(int32_t))
169 {
171 "Failed to write the BAM header length.");
172 return(false);
173 }
174
175 // Write the header to the file.
176 numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
177 if(numWrite != headerLen)
178 {
180 "Failed to write the BAM header.");
181 return(false);
182 }
183
184 ////////////////////////////////////////////////////////
185 // Write the Reference Information.
186 const SamReferenceInfo& refInfo = header.getReferenceInfo();
187
188 // Get the number of sequences.
189 int32_t numSeq = refInfo.getNumEntries();
190 ifwrite(filePtr, &numSeq, sizeof(int32_t));
191
192 // Write each reference sequence
193 for (int i = 0; i < numSeq; i++)
194 {
195 const char* refName = refInfo.getReferenceName(i);
196 // Add one for the null value.
197 int32_t nameLength = strlen(refName) + 1;
198 // Write the length of the reference name.
199 ifwrite(filePtr, &nameLength, sizeof(int32_t));
200
201 // Write the name.
202 ifwrite(filePtr, refName, nameLength);
203 // Write the length of the reference sequence.
204 int32_t refLen = refInfo.getReferenceLength(i);
205 ifwrite(filePtr, &refLen, sizeof(int32_t));
206 }
207
208 return(true);
209}
210
211
212void BamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
213 SamRecord& record,
214 SamStatus& samStatus)
215{
216 // TODO - need to validate there are @SQ lines in both sam/bam - MAYBE!
217
218 // SetBufferFromFile will reset the record prior to reading a new one.
219 if(record.setBufferFromFile(filePtr, header) != SamStatus::SUCCESS)
220 {
221 // Failed, so add the error message.
222 samStatus.addError(record.getStatus());
223 }
224}
225
226SamStatus::Status BamInterface::writeRecord(IFILE filePtr,
227 SamFileHeader& header,
228 SamRecord& record,
230{
231 // Write the file, returning the status.
232 return(record.writeRecordBuffer(filePtr, translation));
233}
234
235
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition InputFile.h:600
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition InputFile.h:669
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition InputFile.h:37
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition InputFile.h:423
This class allows a user to get/set the fields in a SAM/BAM Header.
const char * getErrorMessage()
Get the failure message if a method returned failure.
const SamReferenceInfo & getReferenceInfo() const
Get the Reference Information.
bool getHeaderString(std::string &header) const
Set the passed in string to the entire header string, clearing its current contents.
void resetHeader()
Initialize the header.
bool addHeader(const char *header)
Add a header that is already preformatted in a const char*.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition SamRecord.h:52
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition SamRecord.h:57
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
Class for tracking the reference information mapping between the reference ids and the reference name...
void clear()
Reset this reference info.
int32_t getNumEntries() const
Get the number of entries contained here.
const char * getReferenceName(int index) const
Return the reference name at the specified index, returning "" if the index is out of bounds.
void add(const char *referenceSequenceName, int32_t referenceSequenceLength)
Add reference sequence name and reference sequence length.
int32_t getReferenceLength(int index) const
Return the reference length at the specified index, returning 0 if the index is out of bounds.
This class is used to track the status results of some methods in the BAM classes.
Status
Return value enum for StatGenFile methods.
@ SUCCESS
method completed successfully.
@ FAIL_IO
method failed due to an I/O issue.
@ FAIL_PARSE
failed to parse a record/header - invalid format.
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...