From 67e57c5aa58f667dc96dfee34ec4859af175b7a9 Mon Sep 17 00:00:00 2001 From: Stefan Huber Date: Thu, 9 Jan 2014 20:42:32 +0100 Subject: [PATCH] Add boolean_vector classes --- CMakeLists.txt | 2 +- include/libstick-0.1/booleanvector.h | 394 +++++++++++++++++++++++++++ tests/booleanvector.h | 99 +++++++ tests/main.cc | 2 + 4 files changed, 496 insertions(+), 1 deletion(-) create mode 100644 include/libstick-0.1/booleanvector.h create mode 100644 tests/booleanvector.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d4bde32..ad43d56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") if(CMAKE_BUILD_TYPE STREQUAL "Release") # CMAKE_CXX_FLAGS_RELEASE is appended, but we need to prepend -O2 to take # effect - set(CMAKE_CXX_FLAGS "-O2 ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "-O2 -g ${CMAKE_CXX_FLAGS}") endif() set(PACKAGE_STRING libstick) diff --git a/include/libstick-0.1/booleanvector.h b/include/libstick-0.1/booleanvector.h new file mode 100644 index 0000000..5bfd7e3 --- /dev/null +++ b/include/libstick-0.1/booleanvector.h @@ -0,0 +1,394 @@ +#ifndef booleanvector_h_yeephauChuengiil +#define booleanvector_h_yeephauChuengiil + +#include +#include +#include +#include + + +namespace libstick { + + +/** Print this vector */ +template +void _print_container(std::ostream &os, Iterator begin, Iterator end) { + os << "["; + while (begin != end) { + os << *begin; + if (begin+1 != end) + os << " "; + ++begin; + } + os << "]"; +} + +/** A sparse boolean vector. It manages a std::vector containing the sorted + * indices of all ones, i.e. back() gives the index of the final one. Indices + * are of type IT. */ +template +class sorted_boolean_vector { + public: + typedef IT index_type; + typedef std::vector indexarray; + + void clear() { + array.clear(); + } + + /** Get value at given index. That is, return true/false if idx exists + * in index-array.*/ + bool get(index_type idx) const { + return binary_search(array.begin(), array.end(), idx); + } + + /** Return a sorted list of indices of ones.*/ + const indexarray& get_ones() const { + return array; + } + + /** Two vectors are equal if they contain the same indices. */ + bool operator==(const sorted_boolean_vector &vec) const { + return get_ones() == vec.get_ones(); + } + + /** Set value at given index. That is, add/remove idx to index-array if + * value is true/false. */ + void set(index_type idx, bool value) { + // Get the position where to insert/remove + typename indexarray::iterator it = lower_bound(array.begin(), array.end(), idx); + bool exists = (it != array.end() && *it == idx); + assert(get(idx) == exists); + + // Add 'idx' in sorted array + if (value && !exists) + array.insert(it, idx); + // Remove 'idx' + if (!value && exists) + array.erase(it); + +#ifndef NDEBUG + assert(get(idx) == value); + is_sorted(); +#endif + } + + /** Add another vector to us by means of XOR */ + void add(const sorted_boolean_vector &vec) { +#ifndef NDEBUG + is_sorted(); +#endif + + // Make target array large enough + const size_t maxsize = array.size() + vec.array.size(); + if (tmp.size() < maxsize) + tmp.resize(maxsize); + + // Compute symmetric difference + typename indexarray::iterator it = std::set_symmetric_difference( + array.begin(), array.end(), vec.array.begin(), vec.array.end(), tmp.begin()); + + // Copy back to the original vector + array.resize(it - tmp.begin()); + std::copy(tmp.begin(), it, array.begin()); + } + + /** Remove one with largest index. */ + void pop_back() { + array.pop_back(); + } + + /** Get largest index among all ones. */ + index_type back() const { + return array.back(); + } + + /** Returns true if vector contains no 1. */ + bool isempty() const { + return array.size() == 0; + } + + /** Print this vector */ + void print(std::ostream &os) const { + _print_container(os, array.begin(), array.end()); + } + + private: + bool is_sorted() const { + // C++11 would have is_sorted + for (unsigned i=1; i < array.size(); ++i) + if(array[i-1] > array[i]) + return false; + return true; + } + + private: + /** Heap or sorted array of 1's indices. */ + indexarray array; + + /** To reduce memory allocation and improve performance, here is a + * static temporary array. */ + static indexarray tmp; +}; + +template +typename sorted_boolean_vector::indexarray sorted_boolean_vector::tmp; + +template +std::ostream& operator<<(std::ostream &os, const sorted_boolean_vector &v) { + v.print(os); + return os; +} + + +/** Like sorted_boolean_vector. Depending on the access-pattern, it switches + * between a max-heap and sorted array structure. */ +template +class heapsorted_boolean_vector { + public: + typedef IT index_type; + typedef std::vector indexarray; + + heapsorted_boolean_vector() { + clear(); + } + + void clear() { + isheap = false; + heapsize = 0; + array.clear(); + } + + /** Get value at given index. That is, return true/false if idx exists + * in index-array. This function converts representation to a sorted + * array. */ + bool get(index_type idx) const { + deheapify(); + return binary_search(array.begin(), array.end(), idx); + } + + /** Return a sorted list of indices of ones. */ + const indexarray& get_ones() const { + deheapify(); + return array; + } + + /** Two vectors are equal if they contain the same indices. */ + bool operator==(const heapsorted_boolean_vector &vec) const { + return get_ones() == vec.get_ones(); + } + + /** Set value at given index. That is, add/remove idx to index-array if + * value is true/false. This function converts representation to a + * sorted array.*/ + void set(index_type idx, bool value) { + deheapify(); + + // Get the position where to insert/remove + typename indexarray::iterator it = lower_bound(array.begin(), array.end(), idx); + bool exists = (it != array.end() && *it == idx); + assert(get(idx) == exists); + + // Add 'idx' in sorted array + if (value && !exists) + array.insert(it, idx); + // Remove 'idx' + if (!value && exists) + array.erase(it); + +#ifndef NDEBUG + assert(get(idx) == value); + is_sorted(); +#endif + } + + /** Add another vector to us by means of XOR. This function converts + * the representation to a heap. */ + void add(const heapsorted_boolean_vector &vec) { + // Make space for the combined stuff + size_t size1 = get_indices_count(); + size_t size2 = vec.get_indices_count(); + + // We will become a heap! + isheap = true; + heapsize = size1 + size2; + array.resize(heapsize); + + // Append elements of vec to our own array + std::copy(vec.array.begin(), vec.array.begin() + size2, array.begin() + size1); + // Make a heap out of it + std::make_heap(array.begin(), array.begin() + heapsize); + } + + /** Remove one with largest index. */ + void pop_back() { + assert(!isempty()); + strip_heap(); + + if (isheap) + pop_heap(); + else + array.pop_back(); + } + + /** Get largest index among all ones. */ + index_type back() const { + assert(!isempty()); + strip_heap(); + + return isheap ? array[0] : array.back(); + } + + /** Returns true if vector contains no 1. */ + bool isempty() const { + strip_heap(); + return isheap ? heapsize==0 : array.size()==0; + } + + /** Print this vector */ + void print(std::ostream &os) const { + if (isheap) { + os << "heap:"; + _print_container(os, array.begin(), array.begin() + heapsize); + } else { + os << "sorted:"; + _print_container(os, array.begin(), array.end()); + } + } + + private: + /** Pop an element from the heap */ + void pop_heap() { + assert(isheap); + assert(heapsize > 0); + + if (heapsize > 0) { + std::pop_heap(array.begin(), array.begin() + heapsize); + --heapsize; + } + + assert(is_heap()); + } + + /** If heap representation is used then repeatedly pop top two + * elements if they are equal. */ + void strip_heap() const { + if (!isheap) + return; + + heapsorted_boolean_vector *th = const_cast*>(this); + + // Get rid of top two elements + while (true) { + // Top two elements of heap are equal + if ((heapsize >= 2 && array[0] == array[1]) || + (heapsize >= 3 && array[0] == array[2])) { +#ifndef NDEBUG + index_type t = back(); +#endif + th->pop_heap(); + assert(t == back()); + th->pop_heap(); + } + else + break; + } + + assert(is_heap()); + } + + /** Get the number of indices saved. If heap representation is + * used, indices may be saved multiple times. */ + size_t get_indices_count() const { + if (isheap) + return heapsize; + else + return array.size(); + } + + /** Convert representation to sorted array, takes O(n log n) time. */ + void deheapify() const { + if (!isheap) + return; + + heapsorted_boolean_vector *th = const_cast*>(this); + th->isheap = false; + + // Trivial special cases + if (heapsize == 0) { + th->array.clear(); + return; + } + if (heapsize == 1) { + th->array.resize(1); + return; + } + + // Increase temporary array + if (tmp.size() < heapsize) + th->tmp.resize(heapsize); + + // Get a sorted copy of the heap, takes O(n log n) time. + std::copy(array.begin(), array.begin() + heapsize, th->tmp.begin()); + std::sort_heap(th->tmp.begin(), th->tmp.begin() + heapsize); + + // Copy back to array, but get rid of pairs of equal elements, + // takes O(n) time. + th->array.clear(); + for (unsigned i=0; ; ++i) { + // Skip pairs of equal elements + while (i+1 < heapsize && tmp[i] == tmp[i+1]) + i += 2; + + // Reached end of array + if (i >= heapsize) + break; + + th->array.push_back(tmp[i]); + } + + assert(is_sorted()); + } + + bool is_sorted() const { + // C++11 would have is_sorted + for (unsigned i=1; i < array.size(); ++i) + if(array[i-1] > array[i]) + return false; + return true; + } + + bool is_heap() const { + // C++11 would have is_heap + for (unsigned i=1; i < heapsize; ++i) + if(array[i/2] < array[i]) + return false; + return true; + } + + private: + /** Heap or sorted array of 1's indices. */ + indexarray array; + /** If isheap==true, the number of elements in the heap (this->array) */ + size_t heapsize; + /** Is this->array currently a heap, or a sorted array? */ + bool isheap; + + /** To reduce memory allocation and improve performance, here is a + * static temporary array. */ + static indexarray tmp; +}; + +template +typename heapsorted_boolean_vector::indexarray heapsorted_boolean_vector::tmp; + +template +std::ostream& operator<<(std::ostream &os, const heapsorted_boolean_vector &v) { + v.print(os); + return os; +} + + +} + +#endif diff --git a/tests/booleanvector.h b/tests/booleanvector.h new file mode 100644 index 0000000..ebf6b9f --- /dev/null +++ b/tests/booleanvector.h @@ -0,0 +1,99 @@ +#ifndef booleanvector_h_MeiTephaijaequuy +#define booleanvector_h_MeiTephaijaequuy + +#include +#include + +#include + +using namespace libstick; + + +class boolean_vector_TestSuite: public Test::Suite { + public: + boolean_vector_TestSuite() { + TEST_ADD(boolean_vector_TestSuite::test >); + TEST_ADD(boolean_vector_TestSuite::test >); + + //TEST_ADD(boolean_vector_TestSuite::test_performance >); + //TEST_ADD(boolean_vector_TestSuite::test_performance >); + } + + protected: + virtual void setup() { + } + + virtual void tear_down() { + } + + private: + template + void test() { + const int size = 1000; + + T even; + for (int i=1; 2*i <= size; ++i) + even.set(2*i, true); + + // We added size/2 ones, all of them at even indices. + TEST_ASSERT(even.get_ones().size() == size/2); + for (typename T::indexarray::const_iterator it = even.get_ones().begin(); + it != even.get_ones().end(); ++it) + TEST_ASSERT(*it % 2 == 0); + for (int i=1; i <= size; ++i) + TEST_ASSERT(even.get(i) == (i % 2 == 0)); + + // A complete vector + T all; + for (int i=1; i <= size; ++i) + all.set(i, true); + + // A vector with all odd entries + T odd = even; + odd.add(all); + + // Again size/2 entries in odd. + TEST_ASSERT(odd.get_ones().size() == size/2); + // Check whether we really have odd indices only + for (typename T::indexarray::const_iterator it = odd.get_ones().begin(); + it != odd.get_ones().end(); ++it) + TEST_ASSERT(*it % 2 == 1); + for (int i=1; i <= size; ++i) + TEST_ASSERT(odd.get(i) == (i % 2 == 1)); + + + all.pop_back(); + even.pop_back(); + T all2 = odd; + all2.add(even); + TEST_ASSERT(all2 == all); + } + + template + void test_performance() { + T sum; + + const int size = 1000; + const int cnt = 4; + + T summand; + for (int a=0; a < cnt; ++a) { + summand.clear(); + for (int i=0; i < size; ++i) + summand.set(a*(size/2) + i, true); + sum.add(summand); + } + + //for (int i=0; i < size/2; ++i) { + //TEST_ASSERT(sum.get(i) == true); + //TEST_ASSERT(sum.get(i + size/2) == false); + //TEST_ASSERT(sum.get(i + (cnt-1)*(size/2)) == false); + //TEST_ASSERT(sum.get(i + (cnt)*(size/2)) == true); + //} + + //std::cout << std::endl << sum << std::endl; + } +}; + + +#endif diff --git a/tests/main.cc b/tests/main.cc index dcf0abe..60af1de 100644 --- a/tests/main.cc +++ b/tests/main.cc @@ -1,6 +1,7 @@ #include #include +#include "booleanvector.h" #include "booleanmatrix.h" #include "simplicialcomplex.h" #include "persistence.h" @@ -15,6 +16,7 @@ int main(int argc, char* argv[]) { Test::Suite ts; + ts.add(auto_ptr(new boolean_vector_TestSuite)); ts.add(auto_ptr(new boolean_matrix_TestSuite)); ts.add(auto_ptr(new simplicial_complex_TestSuite)); ts.add(auto_ptr(new persistence_TestSuite)); -- 2.39.5