Use more efficient colmatrix-based reduction
[libstick.git] / include / libstick-0.1 / persistence.h
index 937a4cc2378775dfacc18ceb0c3112d4c62eb68a..6df70abc7c8345b0f11a86d168fdf3bcd9f4f0f2 100644 (file)
@@ -4,6 +4,7 @@
 #include <iostream>
 #include <ostream>
 #include <vector>
+#include <cassert>
 
 #include "simplicialcomplex.h"
 #include "booleanmatrix.h"
 namespace libstick {
 
 
-template<class T>
-std::ostream& operator<<(std::ostream &, const std::vector<T> &);
-
-
 /** Persistence computers various persistent homology information on a
  * simplex_order of a simplicial_complex. */
 template<int MAXDIM, class IT, class VT>
 class persistence {
 
     public:
-        typedef typename simplicial_complex<MAXDIM, IT, VT>::simplex_order simplex_order;
-        typedef typename simplex_order::boundary_matrix boundary_matrix;
+        typedef simplicial_complex<MAXDIM, IT, VT> scomplex;
+        typedef typename scomplex::simplex_order simplex_order;
+        typedef boolean_colmatrix<IT> boundary_matrix;
         typedef boolean_colmatrix<IT> transformation_matrix;
+        typedef std::vector<IT> lowestones_matrix;
 
-        /** A point in a diagram comprises birth- and death-index. These are the
-         * indices of the simplices of 'order' that give birth and death to the
-         * class. */
+        /** A point in a persistence diagram comprises birth- and death-index.
+         * These are the indices of the simplices of 'order' that give birth
+         * and death to the class. */
         struct diagram_point {
+            /** 'birth'-th simplex in 'order' gave birth to this cycle. */
             IT birth;
+            /** 'death'-th simplex in 'order' killed this cycle. If 'death' is
+             * zero, the cycle never dies. Otherwise, 'death' is always larger
+             * than 'birth'. */
             IT death;
         };
 
+        /** Save the births and deaths of simplices of a fixed dimension, say p. */
+        struct diagram {
+            /** Saves the sequence of birth/death-points of p-simplices, sorted
+             * by birth-index. If death-index is zero, the corresponding
+             * homology class never dies. */
+            std::vector<diagram_point> births;
+
+
+            /** Gives the p-th Betti number, i.e., the number of immortal
+             * p-dimensional homology classes. */
+            size_t betti() const {
+                size_t b = 0;
+
+                for (unsigned i=0; i < births.size(); ++i)
+                    if (births[i].death == 0)
+                        b++;
+
+                return b;
+            }
+
+            /** Gives the number of p-dimensional homology classes that are
+             * born by simplex 'from' or earlier and die after simplex 'to' is
+             * inserted. */
+            size_t persistent_betti(IT from, IT to) const {
+                assert(from <= to);
+                size_t b = 0;
+
+                for (unsigned i=0; i < births.size(); ++i) {
+                    // All following simplices are born too late.
+                    if (births[i].birth > from)
+                        break;
+                    if (births[i].death > to || births[i].death == 0)
+                        b++;
+                }
+
+                return b;
+            }
+        };
 
     public:
         /** Create a new peristence object on the given simplex_order */
@@ -41,14 +82,16 @@ class persistence {
             order(order),
             bm(0),
             rbm(0),
+            lowestones(0),
             tm(0)
-        {
+            {
             reset();
         }
 
         /** Reset all results gained from 'order'. */
         void reset() {
             done_matrices = false;
+            done_diagrams = false;
         }
 
         /** Get boundary matrix 'bm' of 'order', compute reduces boundary
@@ -61,37 +104,74 @@ class persistence {
 
             bm = order.get_boundary_matrix();
             rbm = bm;
-            tm = create_unit_matrix<transformation_matrix>(bm.size());
-            reduce_boundary_matrix(rbm, tm);
+            tm = create_unit_matrix<transformation_matrix>(bm.width());
+            lowestones = lowestones_matrix(bm.width());
+
+            // Make every column reduced, i.e., it is a zero-column or contains only one 1.
+            for (unsigned c=0; c < rbm.width(); ++c) {
+                //if (c % 100 == 0)
+                //std::cout << "c = " << c << " (" << (100.0*float(c)/rbm.width()) << " %)" << std::endl;
+                // Reduce as long as we need to reduce
+                while (rbm.get_column(c).size() > 0) {
+                    // (r, c) is the lowest one of col
+                    const IT r = rbm.get_column(c).back();
+                    // This column contains its lowest one on the same row
+                    const IT c2 = lowestones[r];
+                    assert(c2 < c);
+
+                    // There is no valid c2, hence we have completely reduced
+                    if (c2 == 0)
+                        break;
+                    // Reduce col by column c2
+                    else {
+                        assert(rbm.get(r, c));
+                        rbm.add_column(c, rbm.get_column(c2));
+                        tm.add_column(c, tm.get_column(c2));
+                        assert(!rbm.get(r, c));
+                    }
+                }
 
-            assert(rbm == bm * tm);
+                // A lowest one remained, recall it
+                if (rbm.get_column(c).size() > 0) {
+                    const IT r = rbm.get_column(c).back();
+                    assert(lowestones[r] == 0);
+                    assert(r < c);
+                    lowestones[r] = c;
+                }
+            }
+        }
+
+        /** Get the lowest-one matrix of 'order'. */
+        const lowestones_matrix& get_lowestones_matrix() const {
+            assert(done_matrices);
+            return lowestones;
         }
 
         /** Get the boundary matrix of 'order'. */
-        const boundary_matrix& get_boundary_matrix() {
-            compute_matrices();
+        const boundary_matrix& get_boundary_matrix() const {
+            assert(done_matrices);
             return bm;
         }
 
         /** Get the reduced boundary matrix of 'order'. */
-        const boundary_matrix& get_reduced_boundary_matrix() {
-            compute_matrices();
+        const boundary_matrix& get_reduced_boundary_matrix() const {
+            assert(done_matrices);
             return rbm;
         }
 
         /** Get the transformation matrix of 'order', which transforms the
          * boundary matrix to the reduced boundary matrix, when multiplied from
          * the right. */
-        const transformation_matrix& get_transformation_matrix() {
-            compute_matrices();
+        const transformation_matrix& get_transformation_matrix() const {
+            assert(done_matrices);
             return tm;
         }
 
-
         /** Print the sequence of births and deaths of cycles (homology classes). */
-        void interpret_reduction(std::ostream &os) {
-            compute_matrices();
-            for (unsigned c=0; c < bm.size(); ++c) {
+        void interpret_reduction(std::ostream &os) const {
+            assert(done_matrices);
+
+            for (unsigned c=0; c < bm.width(); ++c) {
                 os << c << ". inserting ";
 
                 switch (bm.get_column(c).size()) {
@@ -118,7 +198,7 @@ class persistence {
                 os << std::endl;
                 os << "  boundary: " << bm.get_column(c) << std::endl;
 
-                typename boolean_colrowmatrix<IT>::colbase::column_type col = rbm.get_column(c);
+                typename boolean_colmatrix<IT>::column_type col = rbm.get_column(c);
                 if (col.size() == 0) {
                     os << "  \033[1;32mbirth\033[0;m of a cycle: " << tm.get_column(c) << std::endl;
                 } else {
@@ -129,16 +209,113 @@ class persistence {
             }
         }
 
+        /** Get the 'dim'-dimensional peristence diagram. */
+        const diagram& get_persistence_diagram(int dim) const{
+            assert(-1 <= dim);
+            assert(dim <= MAXDIM);
+            assert(done_diagrams);
+
+            return diagrams[dim+1];
+        }
+
+        /** Compute persistence diagrams */
+        void compute_diagrams() {
+            if (done_diagrams)
+                return;
+            done_diagrams = true;
+
+            compute_matrices();
+
+#ifndef NDEBUG
+            std::set<IT> deaths, births;
+#endif
+            //Clear all diagrams
+            for (int d=0; d < MAXDIM + 2; ++d)
+                diagrams[d] = diagram();
+
+            for (unsigned c=0; c < lowestones.size(); ++c) {
+                // A cycle was born
+                if (rbm.get_column(c).size() == 0) {
+                    const int dim = order.get_simplex(c).dim;
+
+                    // Create a diagram point
+                    diagram_point p;
+                    p.birth = c;
+                    p.death = 0;
+                    // If the class actually dies, get the index of the killing simplex
+                    if (lowestones[c] > 0)
+                        p.death = lowestones[c];
+
+                    _get_persistence_diagram(dim).births.push_back(p);
+#ifndef NDEBUG
+                    assert(births.count(p.birth) == 0);
+                    assert(p.death == 0 || deaths.count(p.death) == 0);
+                    births.insert(p.birth);
+                    deaths.insert(p.death);
+#endif
+                }
+            }
+#ifndef NDEBUG
+            // Shakespeare principle: A simplex either gives birth or kills, be
+            // or not be, tertium non datur.
+            for (unsigned c=0; c < lowestones.size(); ++c) {
+                // A class died with c
+                if (rbm.get_column(c).size() != 0)
+                    assert(c == 0 || deaths.count(c) > 0);
+                else
+                    assert(births.count(c) > 0);
+            }
+#endif
+        }
+
+        /** Print birth and death information in the persistence diagrams. */
+        void interpret_persistence(std::ostream &os) const {
+            assert(done_diagrams);
+
+            for (int d=-1; d <= MAXDIM; ++d) {
+                os << "Dimension " << d << std::endl;
+                const diagram &dia = get_persistence_diagram(d);
+                for (unsigned i=0; i < dia.births.size(); ++i) {
+                    os << "  ";
+                    if (dia.births[i].death > 0)
+                        os << dia.births[i].birth << "\033[1;31m -> " << dia.births[i].death;
+                    else
+                        os << "\033[1;32m" << dia.births[i].birth;
+                    os << "\033[0;m" << std::endl;
+                }
+            }
+        }
+
     private:
 
+        diagram& _get_persistence_diagram(int dim) {
+            assert(-1 <= dim);
+            assert(dim <= MAXDIM);
+            assert(done_diagrams);
+
+            return diagrams[dim+1];
+        }
+
         /** The underlying simplex order of this diagram. */
         const simplex_order &order;
 
         bool done_matrices;
+        bool done_diagrams;
 
+        /** The boundary matrix of 'order' */
         boundary_matrix bm;
+        /** The reduced boundary matrix of 'order' */
         boundary_matrix rbm;
+        /** Only the lowest ones per column of the reduced boundary matrix.
+         * lowestones[i] contains the column-index at which the lowest one at
+         * row i appears, and zero if no such column exists. */
+        lowestones_matrix lowestones;
+        /** The transformation matrix that transforms bm into rbm, i.e. rbm = bm * tm. */
         transformation_matrix tm;
+
+        /** The container for all p-dimensional diagrams. The p-th diagram is
+         * save at index p+1, with -1 <= p <= MAXDIM. */
+        diagram diagrams[MAXDIM+2];
 };
 
 
@@ -148,51 +325,45 @@ class persistence {
  * invariant.  Hence, the resulting b is equal to the product of the boundary
  * matrix times v. */
 template<class IT, class D>
-void reduce_boundary_matrix(boolean_colrowmatrix<IT> &b, boolean_colmatrix_base<IT, D> &v) {
-    assert(b.size() == v.width());
+void reduce_boundary_matrix(boolean_colmatrix_base<IT, D> &b, boolean_colmatrix_base<IT, D> &v) {
+    assert(b.width() == v.width());
+
+    // lowestones[i] gives the column whose lowest 1 is at row i. If it contains
+    // zero, there is no such column.
+    std::vector<IT> lowestones(b.width());
 
     // Make every column reduced, i.e., it is a zero-column or contains only one 1.
-    for (unsigned c=0; c < b.size(); ++c) {
-        const typename boolean_colrowmatrix<IT>::colbase::column_type &col = b.get_column(c);
-        if (col.size() == 0)
-            continue;
-
-        // The column is not a zero-column, take the lowest 1, and reduce the other columns at the row
-        IT r = col.back();
-        assert(b.get(r, c));
-
-        // Get a copy of the r-th row
-        typename boolean_colrowmatrix<IT>::rowbase::row_type row(b.get_row(r).size());
-        copy(b.get_row(r).begin(), b.get_row(r).end(), row.begin());
-        assert(row.size() >= 1);
-
-        // Get rid of 1s at that row right of column c.
-        typename boolean_colrowmatrix<IT>::row_type::const_iterator it = lower_bound(row.begin(), row.end(), c);
-        for(; it != row.end(); ++it) {
-            if (*it <= c)
-                continue;
-
-            assert(b.get(r, *it));
-            b.add_column(*it, col);
-            v.add_column(*it, v.get_column(c));
-            assert(!b.get(r, *it));
+    for (unsigned c=0; c < b.width(); ++c) {
+        //if (c % 100 == 0)
+            //std::cout << "c = " << c << " (" << (100.0*float(c)/b.width()) << " %)" << std::endl;
+        // Reduce as long as we need to reduce
+        while (b.get_column(c).size() > 0) {
+            // (r, c) is the lowest one of col
+            const IT r = b.get_column(c).back();
+            // This column contains its lowest one on the same row
+            const IT c2 = lowestones[r];
+            assert(c2 < c);
+
+            // There is no valid c2, hence we have completely reduced
+            if (c2 == 0)
+                break;
+            // Reduce col by column c2
+            else {
+                assert(b.get(r, c));
+                b.add_column(c, b.get_column(c2));
+                v.add_column(c, v.get_column(c2));
+                assert(!b.get(r, c));
+            }
         }
-    }
-}
 
-template<class T>
-std::ostream& operator<<(std::ostream& os, const std::vector<T> &vec) {
-    os << "[";
-
-    typename std::vector<T>::const_iterator it = vec.begin();
-    while ( it != vec.end()) {
-        os << *it;
-        if (++it != vec.end())
-            os << " ";
+        // A lowest one remained, recall it
+        if (b.get_column(c).size() > 0) {
+            const IT r = b.get_column(c).back();
+            assert(lowestones[r] == 0);
+            assert(r < c);
+            lowestones[r] = c;
+        }
     }
-
-    os << "]";
-    return os;
 }