libStatGen Software 1
Loading...
Searching...
No Matches
BamIndexTest.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "BamIndex.h"
19#include "TestValidate.h"
20#include "BamIndexTest.h"
21
22#include <assert.h>
23
24void testIndex(BamIndex& bamIndex)
25{
26#ifdef __ZLIB_AVAILABLE__
27 assert(bamIndex.getNumMappedReads(1) == 2);
28 assert(bamIndex.getNumUnMappedReads(1) == 0);
29 assert(bamIndex.getNumMappedReads(0) == 4);
30 assert(bamIndex.getNumUnMappedReads(0) == 1);
31 assert(bamIndex.getNumMappedReads(23) == -1);
32 assert(bamIndex.getNumUnMappedReads(23) == -1);
33 assert(bamIndex.getNumMappedReads(-1) == 0);
34 assert(bamIndex.getNumUnMappedReads(-1) == 2);
35 assert(bamIndex.getNumMappedReads(-2) == -1);
36 assert(bamIndex.getNumUnMappedReads(-2) == -1);
37 assert(bamIndex.getNumMappedReads(22) == 0);
38 assert(bamIndex.getNumUnMappedReads(22) == 0);
39
40 // Get the chunks for reference id 1.
41 Chunk testChunk;
42 SortedChunkList chunkList;
43 assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true);
44 assert(!chunkList.empty());
45 testChunk = chunkList.pop();
46 assert(chunkList.empty());
47 assert(testChunk.chunk_beg == 0x4e7);
48 assert(testChunk.chunk_end == 0x599);
49
50 // Get the chunks for reference id 0.
51 assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true);
52 assert(!chunkList.empty());
53 testChunk = chunkList.pop();
54 assert(chunkList.empty());
55 assert(testChunk.chunk_beg == 0x360);
56 assert(testChunk.chunk_end == 0x4e7);
57
58
59 // Get the chunks for reference id 2.
60 assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true);
61 assert(!chunkList.empty());
62 testChunk = chunkList.pop();
63 assert(chunkList.empty());
64 assert(testChunk.chunk_beg == 0x599);
65 assert(testChunk.chunk_end == 0x5ea);
66
67 // Get the chunks for reference id 3.
68 // There isn't one for this ref id, but still successfully read the file,
69 // so it should return true, but the list should be empty.
70 assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true);
71 assert(chunkList.empty());
72
73 // Test reading an indexed bam file.
74 SamFile inFile;
75 assert(inFile.OpenForRead("testFiles/sortedBam.bam"));
77 assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai"));
78 SamFileHeader samHeader;
79 assert(inFile.ReadHeader(samHeader));
80 SamRecord samRecord;
81
82 // Test getting num mapped/unmapped reads.
83 assert(inFile.getNumMappedReadsFromIndex(1) == 2);
84 assert(inFile.getNumUnMappedReadsFromIndex(1) == 0);
85 assert(inFile.getNumMappedReadsFromIndex(0) == 4);
86 assert(inFile.getNumUnMappedReadsFromIndex(0) == 1);
87 assert(inFile.getNumMappedReadsFromIndex(23) == -1);
88 assert(inFile.getNumUnMappedReadsFromIndex(23) == -1);
89 assert(inFile.getNumMappedReadsFromIndex(-1) == 0);
90 assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2);
91 assert(inFile.getNumMappedReadsFromIndex(-2) == -1);
92 assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1);
93 assert(inFile.getNumMappedReadsFromIndex(22) == 0);
94 assert(inFile.getNumUnMappedReadsFromIndex(22) == 0);
95
96 assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2);
97 assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0);
98 assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4);
99 assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1);
100 assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0);
101 assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0);
102 assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0);
103 assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2);
104 assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1);
105 assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1);
106 assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0);
107 assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0);
108
109 // Set the read section saying the reads must be fully enclosed in the section.
110 assert(inFile.SetReadSection("1", 1010, 1011, false));
111 assert(inFile.ReadRecord(samHeader, samRecord) == false);
112
113 assert(inFile.SetReadSection("1", 1011, 1012, false));
114 assert(inFile.ReadRecord(samHeader, samRecord));
115 validateRead2(samRecord);
116 assert(inFile.ReadRecord(samHeader, samRecord) == false);
117
118 // Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected
119 // in the validation.
120 assert(inFile.SetReadSection(-1));
121 assert(inFile.ReadRecord(samHeader, samRecord));
122 validateRead8(samRecord);
123 assert(inFile.ReadRecord(samHeader, samRecord));
124 validateRead10(samRecord);
125 assert(inFile.ReadRecord(samHeader, samRecord) == false);
126
127 // Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected
128 // in the validation.
129 assert(inFile.SetReadSection(2));
130 assert(inFile.ReadRecord(samHeader, samRecord));
131 validateRead9(samRecord);
132 assert(inFile.ReadRecord(samHeader, samRecord) == false);
133
134 // Section 0 = Ref 1: 5 records (3, 4, 1, 2, & 6 from testSam.sam that is
135 // reflected in the validation.
136 assert(inFile.SetReadSection(0));
137 assert(inFile.ReadRecord(samHeader, samRecord));
138 validateRead3(samRecord);
139 assert(inFile.ReadRecord(samHeader, samRecord));
140 validateRead4(samRecord);
141 assert(inFile.ReadRecord(samHeader, samRecord));
142 validateRead1(samRecord);
143 assert(inFile.ReadRecord(samHeader, samRecord));
144 validateRead2(samRecord);
145 assert(inFile.ReadRecord(samHeader, samRecord));
146 validateRead6(samRecord);
147 assert(inFile.ReadRecord(samHeader, samRecord) == false);
148
149 // Section 1 = Ref 2: 2 records (5 & 7 from testSam.sam that is reflected
150 // in the validation.
151 assert(inFile.SetReadSection(1));
152 assert(inFile.ReadRecord(samHeader, samRecord));
153 validateRead5(samRecord);
154 assert(inFile.ReadRecord(samHeader, samRecord));
155 validateRead7(samRecord);
156 assert(inFile.ReadRecord(samHeader, samRecord) == false);
157
158 // Section 3 to 22 (ref 4 - 23): 0 records.
159 for(int i = 3; i < 23; i++)
160 {
161 assert(inFile.SetReadSection(i));
162 assert(inFile.ReadRecord(samHeader, samRecord) == false);
163 }
164
165
166 // Set the read section.
167 assert(inFile.SetReadSection("1", 1010, 1012));
168 assert(inFile.ReadRecord(samHeader, samRecord));
169 validateRead1(samRecord);
170 assert(inFile.GetNumOverlaps(samRecord) == 2);
171 assert(samRecord.getNumOverlaps(1010, 1012) == 2);
172 assert(samRecord.getNumOverlaps(1010, 1020) == 5);
173 assert(samRecord.getNumOverlaps(1010, 1011) == 1);
174 assert(samRecord.getNumOverlaps(1011, 1012) == 1);
175 assert(inFile.ReadRecord(samHeader, samRecord));
176 validateRead2(samRecord);
177 assert(inFile.GetNumOverlaps(samRecord) == 0);
178 assert(samRecord.getNumOverlaps(1010, 1012) == 0);
179 assert(samRecord.getNumOverlaps(1010, 1020) == 0);
180 assert(samRecord.getNumOverlaps(1010, 1011) == 0);
181 assert(samRecord.getNumOverlaps(1011, 1012) == 0);
182 assert(inFile.ReadRecord(samHeader, samRecord) == false);
183
184 assert(inFile.SetReadSection("1", 1010, 1020));
185 assert(inFile.ReadRecord(samHeader, samRecord));
186 validateRead1(samRecord);
187 assert(inFile.GetNumOverlaps(samRecord) == 5);
188 assert(samRecord.getNumOverlaps(1010, 1012) == 2);
189 assert(samRecord.getNumOverlaps(1010, 1020) == 5);
190 assert(samRecord.getNumOverlaps(1010, 1011) == 1);
191 assert(samRecord.getNumOverlaps(1011, 1012) == 1);
192 assert(inFile.ReadRecord(samHeader, samRecord));
193 validateRead2(samRecord);
194 assert(inFile.GetNumOverlaps(samRecord) == 0);
195 assert(samRecord.getNumOverlaps(1010, 1012) == 0);
196 assert(samRecord.getNumOverlaps(1010, 1020) == 0);
197 assert(samRecord.getNumOverlaps(1010, 1011) == 0);
198 assert(samRecord.getNumOverlaps(1011, 1012) == 0);
199 assert(inFile.ReadRecord(samHeader, samRecord) == false);
200
201 assert(inFile.SetReadSection("1", 1010, 1011));
202 assert(inFile.ReadRecord(samHeader, samRecord));
203 validateRead1(samRecord);
204 assert(inFile.GetNumOverlaps(samRecord) == 1);
205 assert(samRecord.getNumOverlaps(1010, 1012) == 2);
206 assert(samRecord.getNumOverlaps(1010, 1020) == 5);
207 assert(samRecord.getNumOverlaps(1010, 1011) == 1);
208 assert(samRecord.getNumOverlaps(1011, 1012) == 1);
209 assert(inFile.ReadRecord(samHeader, samRecord) == false);
210
211 assert(inFile.SetReadSection("1", 1011, 1012));
212 assert(inFile.ReadRecord(samHeader, samRecord));
213 validateRead1(samRecord);
214 assert(inFile.GetNumOverlaps(samRecord) == 1);
215 assert(samRecord.getNumOverlaps(1010, 1012) == 2);
216 assert(samRecord.getNumOverlaps(1010, 1020) == 5);
217 assert(samRecord.getNumOverlaps(1010, 1011) == 1);
218 assert(samRecord.getNumOverlaps(1011, 1012) == 1);
219 assert(inFile.ReadRecord(samHeader, samRecord));
220 validateRead2(samRecord);
221 assert(inFile.GetNumOverlaps(samRecord) == 0);
222 assert(samRecord.getNumOverlaps(1010, 1012) == 0);
223 assert(samRecord.getNumOverlaps(1010, 1020) == 0);
224 assert(samRecord.getNumOverlaps(1010, 1011) == 0);
225 assert(samRecord.getNumOverlaps(1011, 1012) == 0);
226 assert(inFile.ReadRecord(samHeader, samRecord) == false);
227#endif
228}
229
230
231void testBamIndex()
232{
233 // BAM indexes are compressed, so can't be tested without zlib.
234#ifdef __ZLIB_AVAILABLE__
235 // Create a bam index.
236 BamIndex bamIndex;
237 bamIndex.readIndex("testFiles/sortedBam.bam.bai");
238 testIndex(bamIndex);
239
240 BamIndexFileTest test1;
241 bool caughtException = false;
242 try
243 {
244 // Try reading the bam index without specifying a
245 // filename and before opening a bam file.
246 assert(test1.ReadBamIndex() == false);
247 }
248 catch (std::exception& e)
249 {
250 caughtException = true;
251 assert(strcmp(e.what(), "FAIL_ORDER: Failed to read the bam Index file - the BAM file needs to be read first in order to determine the index filename.") == 0);
252 }
253 // Should have failed and thrown an exception.
254 assert(caughtException);
255
256 // Read the bam index with a specified name.
257 assert(test1.ReadBamIndex("testFiles/sortedBam.bam.bai"));
258 BamIndex* index = test1.getBamIndex();
259 assert(index != NULL);
260 testIndex(*index);
261
262 // Open the bam file so the index can be opened.
263 assert(test1.OpenForRead("testFiles/sortedBam.bam"));
264 // Try reading the bam index without specifying a
265 // filename after opening a bam file.
266 assert(test1.ReadBamIndex() == true);
267 index = test1.getBamIndex();
268 assert(index != NULL);
269 testIndex(*index);
270
271 // Open the bam file so the index can be opened.
272 // This time the index file does not have .bam in it.
273 assert(test1.OpenForRead("testFiles/sortedBam2.bam"));
274 // Try reading the bam index without specifying a
275 // filename after opening a bam file.
276 assert(test1.ReadBamIndex() == true);
277 index = test1.getBamIndex();
278 assert(index != NULL);
279 testIndex(*index);
280#endif
281}
282
283
bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
Get the list of chunks associated with this region.
Definition BamIndex.cpp:218
int32_t getNumMappedReads(int32_t refID)
Get the number of mapped reads for this reference id.
Definition BamIndex.cpp:355
SamStatus::Status readIndex(const char *filename)
Definition BamIndex.cpp:45
int32_t getNumUnMappedReads(int32_t refID)
Get the number of unmapped reads for this reference id.
Definition BamIndex.cpp:377
This class allows a user to get/set the fields in a SAM/BAM Header.
Allows the user to easily read/write a SAM/BAM file.
Definition SamFile.h:36
bool ReadHeader(SamFileHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition SamFile.cpp:450
int32_t getNumUnMappedReadsFromIndex(int32_t refID)
Get the number of unmapped reads in the specified reference id.
Definition SamFile.cpp:818
bool SetReadSection(int32_t refID)
Sets which reference id (index into the BAM list of reference information) of the BAM file should be ...
Definition SamFile.cpp:696
@ COORDINATE
file is sorted by coordinate.
Definition SamFile.h:49
bool ReadRecord(SamFileHeader &header, SamRecord &record)
Reads the next record from the file & stores it in the passed in record.
Definition SamFile.cpp:514
uint32_t GetNumOverlaps(SamRecord &samRecord)
Returns the number of bases in the passed in read that overlap the region that is currently set.
Definition SamFile.cpp:877
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition SamFile.cpp:93
int32_t getNumMappedReadsFromIndex(int32_t refID)
Get the number of mapped reads in the specified reference id.
Definition SamFile.cpp:803
bool ReadBamIndex(const char *filename)
Read the specified bam index file.
Definition SamFile.cpp:300
void setSortedValidation(SortedType sortType)
Set the flag to validate that the file is sorted as it is read/written.
Definition SamFile.cpp:682
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition SamRecord.h:52
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.