Add boolean_vector classes
[libstick.git] / include / libstick-0.1 / booleanvector.h
1 #ifndef booleanvector_h_yeephauChuengiil
2 #define booleanvector_h_yeephauChuengiil
3
4 #include <cassert>
5 #include <vector>
6 #include <algorithm>
7 #include <ostream>
8
9
10 namespace libstick {
11
12
13 /** Print this vector */
14 template<class Iterator>
15 void _print_container(std::ostream &os, Iterator begin, Iterator end) {
16 os << "[";
17 while (begin != end) {
18 os << *begin;
19 if (begin+1 != end)
20 os << " ";
21 ++begin;
22 }
23 os << "]";
24 }
25
26 /** A sparse boolean vector. It manages a std::vector containing the sorted
27 * indices of all ones, i.e. back() gives the index of the final one. Indices
28 * are of type IT. */
29 template<class IT>
30 class sorted_boolean_vector {
31 public:
32 typedef IT index_type;
33 typedef std::vector<index_type> indexarray;
34
35 void clear() {
36 array.clear();
37 }
38
39 /** Get value at given index. That is, return true/false if idx exists
40 * in index-array.*/
41 bool get(index_type idx) const {
42 return binary_search(array.begin(), array.end(), idx);
43 }
44
45 /** Return a sorted list of indices of ones.*/
46 const indexarray& get_ones() const {
47 return array;
48 }
49
50 /** Two vectors are equal if they contain the same indices. */
51 bool operator==(const sorted_boolean_vector<IT> &vec) const {
52 return get_ones() == vec.get_ones();
53 }
54
55 /** Set value at given index. That is, add/remove idx to index-array if
56 * value is true/false. */
57 void set(index_type idx, bool value) {
58 // Get the position where to insert/remove
59 typename indexarray::iterator it = lower_bound(array.begin(), array.end(), idx);
60 bool exists = (it != array.end() && *it == idx);
61 assert(get(idx) == exists);
62
63 // Add 'idx' in sorted array
64 if (value && !exists)
65 array.insert(it, idx);
66 // Remove 'idx'
67 if (!value && exists)
68 array.erase(it);
69
70 #ifndef NDEBUG
71 assert(get(idx) == value);
72 is_sorted();
73 #endif
74 }
75
76 /** Add another vector to us by means of XOR */
77 void add(const sorted_boolean_vector<IT> &vec) {
78 #ifndef NDEBUG
79 is_sorted();
80 #endif
81
82 // Make target array large enough
83 const size_t maxsize = array.size() + vec.array.size();
84 if (tmp.size() < maxsize)
85 tmp.resize(maxsize);
86
87 // Compute symmetric difference
88 typename indexarray::iterator it = std::set_symmetric_difference(
89 array.begin(), array.end(), vec.array.begin(), vec.array.end(), tmp.begin());
90
91 // Copy back to the original vector
92 array.resize(it - tmp.begin());
93 std::copy(tmp.begin(), it, array.begin());
94 }
95
96 /** Remove one with largest index. */
97 void pop_back() {
98 array.pop_back();
99 }
100
101 /** Get largest index among all ones. */
102 index_type back() const {
103 return array.back();
104 }
105
106 /** Returns true if vector contains no 1. */
107 bool isempty() const {
108 return array.size() == 0;
109 }
110
111 /** Print this vector */
112 void print(std::ostream &os) const {
113 _print_container(os, array.begin(), array.end());
114 }
115
116 private:
117 bool is_sorted() const {
118 // C++11 would have is_sorted
119 for (unsigned i=1; i < array.size(); ++i)
120 if(array[i-1] > array[i])
121 return false;
122 return true;
123 }
124
125 private:
126 /** Heap or sorted array of 1's indices. */
127 indexarray array;
128
129 /** To reduce memory allocation and improve performance, here is a
130 * static temporary array. */
131 static indexarray tmp;
132 };
133
134 template<class IT>
135 typename sorted_boolean_vector<IT>::indexarray sorted_boolean_vector<IT>::tmp;
136
137 template<class IT>
138 std::ostream& operator<<(std::ostream &os, const sorted_boolean_vector<IT> &v) {
139 v.print(os);
140 return os;
141 }
142
143
144 /** Like sorted_boolean_vector. Depending on the access-pattern, it switches
145 * between a max-heap and sorted array structure. */
146 template<class IT>
147 class heapsorted_boolean_vector {
148 public:
149 typedef IT index_type;
150 typedef std::vector<index_type> indexarray;
151
152 heapsorted_boolean_vector() {
153 clear();
154 }
155
156 void clear() {
157 isheap = false;
158 heapsize = 0;
159 array.clear();
160 }
161
162 /** Get value at given index. That is, return true/false if idx exists
163 * in index-array. This function converts representation to a sorted
164 * array. */
165 bool get(index_type idx) const {
166 deheapify();
167 return binary_search(array.begin(), array.end(), idx);
168 }
169
170 /** Return a sorted list of indices of ones. */
171 const indexarray& get_ones() const {
172 deheapify();
173 return array;
174 }
175
176 /** Two vectors are equal if they contain the same indices. */
177 bool operator==(const heapsorted_boolean_vector<IT> &vec) const {
178 return get_ones() == vec.get_ones();
179 }
180
181 /** Set value at given index. That is, add/remove idx to index-array if
182 * value is true/false. This function converts representation to a
183 * sorted array.*/
184 void set(index_type idx, bool value) {
185 deheapify();
186
187 // Get the position where to insert/remove
188 typename indexarray::iterator it = lower_bound(array.begin(), array.end(), idx);
189 bool exists = (it != array.end() && *it == idx);
190 assert(get(idx) == exists);
191
192 // Add 'idx' in sorted array
193 if (value && !exists)
194 array.insert(it, idx);
195 // Remove 'idx'
196 if (!value && exists)
197 array.erase(it);
198
199 #ifndef NDEBUG
200 assert(get(idx) == value);
201 is_sorted();
202 #endif
203 }
204
205 /** Add another vector to us by means of XOR. This function converts
206 * the representation to a heap. */
207 void add(const heapsorted_boolean_vector<IT> &vec) {
208 // Make space for the combined stuff
209 size_t size1 = get_indices_count();
210 size_t size2 = vec.get_indices_count();
211
212 // We will become a heap!
213 isheap = true;
214 heapsize = size1 + size2;
215 array.resize(heapsize);
216
217 // Append elements of vec to our own array
218 std::copy(vec.array.begin(), vec.array.begin() + size2, array.begin() + size1);
219 // Make a heap out of it
220 std::make_heap(array.begin(), array.begin() + heapsize);
221 }
222
223 /** Remove one with largest index. */
224 void pop_back() {
225 assert(!isempty());
226 strip_heap();
227
228 if (isheap)
229 pop_heap();
230 else
231 array.pop_back();
232 }
233
234 /** Get largest index among all ones. */
235 index_type back() const {
236 assert(!isempty());
237 strip_heap();
238
239 return isheap ? array[0] : array.back();
240 }
241
242 /** Returns true if vector contains no 1. */
243 bool isempty() const {
244 strip_heap();
245 return isheap ? heapsize==0 : array.size()==0;
246 }
247
248 /** Print this vector */
249 void print(std::ostream &os) const {
250 if (isheap) {
251 os << "heap:";
252 _print_container(os, array.begin(), array.begin() + heapsize);
253 } else {
254 os << "sorted:";
255 _print_container(os, array.begin(), array.end());
256 }
257 }
258
259 private:
260 /** Pop an element from the heap */
261 void pop_heap() {
262 assert(isheap);
263 assert(heapsize > 0);
264
265 if (heapsize > 0) {
266 std::pop_heap(array.begin(), array.begin() + heapsize);
267 --heapsize;
268 }
269
270 assert(is_heap());
271 }
272
273 /** If heap representation is used then repeatedly pop top two
274 * elements if they are equal. */
275 void strip_heap() const {
276 if (!isheap)
277 return;
278
279 heapsorted_boolean_vector<IT> *th = const_cast<heapsorted_boolean_vector<IT>*>(this);
280
281 // Get rid of top two elements
282 while (true) {
283 // Top two elements of heap are equal
284 if ((heapsize >= 2 && array[0] == array[1]) ||
285 (heapsize >= 3 && array[0] == array[2])) {
286 #ifndef NDEBUG
287 index_type t = back();
288 #endif
289 th->pop_heap();
290 assert(t == back());
291 th->pop_heap();
292 }
293 else
294 break;
295 }
296
297 assert(is_heap());
298 }
299
300 /** Get the number of indices saved. If heap representation is
301 * used, indices may be saved multiple times. */
302 size_t get_indices_count() const {
303 if (isheap)
304 return heapsize;
305 else
306 return array.size();
307 }
308
309 /** Convert representation to sorted array, takes O(n log n) time. */
310 void deheapify() const {
311 if (!isheap)
312 return;
313
314 heapsorted_boolean_vector<IT> *th = const_cast<heapsorted_boolean_vector<IT>*>(this);
315 th->isheap = false;
316
317 // Trivial special cases
318 if (heapsize == 0) {
319 th->array.clear();
320 return;
321 }
322 if (heapsize == 1) {
323 th->array.resize(1);
324 return;
325 }
326
327 // Increase temporary array
328 if (tmp.size() < heapsize)
329 th->tmp.resize(heapsize);
330
331 // Get a sorted copy of the heap, takes O(n log n) time.
332 std::copy(array.begin(), array.begin() + heapsize, th->tmp.begin());
333 std::sort_heap(th->tmp.begin(), th->tmp.begin() + heapsize);
334
335 // Copy back to array, but get rid of pairs of equal elements,
336 // takes O(n) time.
337 th->array.clear();
338 for (unsigned i=0; ; ++i) {
339 // Skip pairs of equal elements
340 while (i+1 < heapsize && tmp[i] == tmp[i+1])
341 i += 2;
342
343 // Reached end of array
344 if (i >= heapsize)
345 break;
346
347 th->array.push_back(tmp[i]);
348 }
349
350 assert(is_sorted());
351 }
352
353 bool is_sorted() const {
354 // C++11 would have is_sorted
355 for (unsigned i=1; i < array.size(); ++i)
356 if(array[i-1] > array[i])
357 return false;
358 return true;
359 }
360
361 bool is_heap() const {
362 // C++11 would have is_heap
363 for (unsigned i=1; i < heapsize; ++i)
364 if(array[i/2] < array[i])
365 return false;
366 return true;
367 }
368
369 private:
370 /** Heap or sorted array of 1's indices. */
371 indexarray array;
372 /** If isheap==true, the number of elements in the heap (this->array) */
373 size_t heapsize;
374 /** Is this->array currently a heap, or a sorted array? */
375 bool isheap;
376
377 /** To reduce memory allocation and improve performance, here is a
378 * static temporary array. */
379 static indexarray tmp;
380 };
381
382 template<class IT>
383 typename heapsorted_boolean_vector<IT>::indexarray heapsorted_boolean_vector<IT>::tmp;
384
385 template<class IT>
386 std::ostream& operator<<(std::ostream &os, const heapsorted_boolean_vector<IT> &v) {
387 v.print(os);
388 return os;
389 }
390
391
392 }
393
394 #endif