Added toString(const T&), cleaned up utils/uai2fg and added tests/twofactors.fg
authorJoris Mooij <joris.mooij@tuebingen.mpg.de>
Sun, 25 Apr 2010 13:43:05 +0000 (15:43 +0200)
committerJoris Mooij <joris.mooij@tuebingen.mpg.de>
Sun, 25 Apr 2010 13:43:05 +0000 (15:43 +0200)
ChangeLog
include/dai/util.h
tests/twofactors.fg [new file with mode: 0644]
utils/uai2fg.cpp

index eb0b8ed..a2a0411 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -119,6 +119,7 @@ git master HEAD
   - Added PropertySet::erase()
   - Fixed bug in PropertySet::setAsString<T>()
 * Improved util.h/cpp:
+  - Added std::string toString( const T& x )
   - Fixed a bug in rnd_seed()
   - Removed max( const std::vector<Real> &v )
 * Improved weightedgraph.h/cpp:
index 96e7887..48858c7 100644 (file)
@@ -24,6 +24,7 @@
 #include <iostream>
 #include <boost/foreach.hpp>
 #include <boost/functional/hash.hpp>
+#include <boost/lexical_cast.hpp>
 #include <algorithm>
 #include <cerrno>
 
@@ -150,6 +151,13 @@ inline int rnd( int n ) {
 }
 
 
+/// Converts a variable of type \a T to a \c std::string by using a \c std::stringstream
+template<class T>
+std::string toString( const T& x ) {
+    return boost::lexical_cast<std::string>(x);
+}
+
+
 /// Writes a \c std::vector<> to a \c std::ostream
 template<class T>
 std::ostream& operator << (std::ostream& os, const std::vector<T> & x) {
diff --git a/tests/twofactors.fg b/tests/twofactors.fg
new file mode 100644 (file)
index 0000000..62136c0
--- /dev/null
@@ -0,0 +1,19 @@
+2
+
+2
+0 1 
+2 2 
+4
+0   0.570273
+1   0.702232
+2   0.774333
+3    3.41627
+
+2
+2 3
+2 2 
+4
+0    1.51657
+1   0.920425
+2   0.254792
+3    1.50469
index 6c2347b..47bc025 100644 (file)
@@ -20,24 +20,33 @@ using namespace std;
 using namespace dai;
 
 
-map<size_t, size_t> ReadUAIEvidenceFile( char *filename ) {
+/// Reads "evidence" (a mapping from observed variable labels to the observed values) from a UAI evidence file
+map<size_t, size_t> ReadUAIEvidenceFile( char* filename ) {
     map<size_t, size_t> evid;
 
+    // open file
     ifstream is;
     is.open( filename );
     if( is.is_open() ) {
+        // read number of observed variables
         size_t nr_evid;
         is >> nr_evid;
         if( is.fail() )
             DAI_THROWE(INVALID_EVIDENCE_FILE,"Cannot read number of observed variables");
 
+        // for each observation, read the variable label and the observed value
         for( size_t i = 0; i < nr_evid; i++ ) {
             size_t label, val;
             is >> label;
+            if( is.fail() )
+                DAI_THROWE(INVALID_EVIDENCE_FILE,"Cannot read label for " + toString(i) + "'th observed variable");
             is >> val;
+            if( is.fail() )
+                DAI_THROWE(INVALID_EVIDENCE_FILE,"Cannot read value of " + toString(i) + "'th observed variable");
             evid[label] = val;
         }
 
+        // close file
         is.close();
     } else
         DAI_THROWE(CANNOT_READ_FILE,"Cannot read from file " + std::string(filename));
@@ -46,196 +55,230 @@ map<size_t, size_t> ReadUAIEvidenceFile( char *filename ) {
 }
 
 
-pair<vector<Var>, vector<Factor> > ReadUAIFGFile( const char *filename, long verbose ) {
-    vector<Factor> factors;
-    vector<Var> vars;
+/// Reads factor graph (as a pair of a variable vector and factor vector) from a UAI factor graph file
+pair<vector<Var>, vector<Factor> > ReadUAIFGFile( const char *filename, size_t verbose ) {
+    pair<vector<Var>, vector<Factor> > result;
+    vector<Var>& vars = result.first;
+    vector<Factor>& factors = result.second;
 
+    // open file
     ifstream is;
     is.open( filename );
     if( is.is_open() ) {
-        try { // FIXME: no exceptions are generated by iostreams by default
-            size_t nr_f, nr_v;
-            string line;
-            
-            getline(is,line);
-            if( line != "BAYES" && line != "MARKOV" && line != "BAYES\r" && line != "MARKOV\r" )
-                DAI_THROW(INVALID_FACTORGRAPH_FILE);
-            if( verbose >= 2 )
-                cout << "Reading " << line << " network..." << endl;
-
-            is >> nr_v;
+        size_t nrFacs, nrVars;
+        string line;
+        
+        // read header line
+        getline(is,line);
+        if( is.fail() || (line != "BAYES" && line != "MARKOV" && line != "BAYES\r" && line != "MARKOV\r") )
+            DAI_THROWE(INVALID_FACTORGRAPH_FILE,"UAI factor graph file should start with \"BAYES\" or \"MARKOV\"");
+        if( verbose >= 2 )
+            cout << "Reading " << line << " network..." << endl;
+
+        // read number of variables
+        is >> nrVars;
+        if( is.fail() )
+            DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of variables");
+        if( verbose >= 2 )
+            cout << "Reading " << nrVars << " variables..." << endl;
+
+        // for each variable, read its number of states
+        vars.reserve( nrVars );
+        for( size_t i = 0; i < nrVars; i++ ) {
+            size_t dim;
+            is >> dim;
             if( is.fail() )
-                DAI_THROW(INVALID_FACTORGRAPH_FILE);
-            if( verbose >= 2 )
-                cout << "Reading " << nr_v << " variables..." << endl;
+                DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of states of " + toString(i) + "'th variable");
+            vars.push_back( Var( i, dim ) );
+        }
 
-            for( size_t i = 0; i < nr_v; i++ ) {
-                size_t dim;
-                is >> dim;
-                if( is.fail() )
-                    DAI_THROW(INVALID_FACTORGRAPH_FILE);
-                vars.push_back( Var( i, dim ) );
-            }
+        // read number of factors
+        is >> nrFacs;
+        if( is.fail() )
+            DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of factors");
+        if( verbose >= 2 )
+            cout << "Reading " << nrFacs << " factors..." << endl;
+
+        // for each factor, read the variables on which it depends
+        vector<vector<long> > labels;
+        factors.reserve( nrFacs );
+        labels.reserve( nrFacs );
+        for( size_t I = 0; I < nrFacs; I++ ) {
+            if( verbose >= 3 )
+                cout << "Reading factor " << I << "..." << endl;
 
-            is >> nr_f;
+            // read number of variables for factor I
+            size_t I_nrVars;
+            is >> I_nrVars;
             if( is.fail() )
-                DAI_THROW(INVALID_FACTORGRAPH_FILE);
-            if( verbose >= 2 )
-                cout << "Reading " << nr_f << " factors..." << endl;
-
-            vector<vector<long> > labels;
-            for( size_t I = 0; I < nr_f; I++ ) {
-                if( verbose >= 3 )
-                    cout << "Reading factor " << I << "..." << endl;
-                size_t I_nrvars;
-                is >> I_nrvars;
-                if( verbose >= 3 )
-                    cout << "  I_nrvars: " << I_nrvars << endl;
-
-                vector<long> I_labels;
-                vector<size_t> I_dims;
-                VarSet I_vars;
-                for( size_t _i = 0; _i < I_nrvars; _i++ ) {
-                    long label;
-                    is >> label;
-                    I_labels.push_back(label);
-                    I_dims.push_back(vars[label].states());
-                    I_vars |= vars[label];
-                }
-                if( verbose >= 3 )
-                    cout << "  labels: " << I_labels << ", dimensions " << I_dims << endl;
-
-                // add the Factor and the labels
-                factors.push_back(Factor(I_vars,0.0));
-                labels.push_back(I_labels);
-                
+                DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of variables for " + toString(I) + "'th factor");
+            if( verbose >= 3 )
+                cout << "  which depends on " << I_nrVars << " variables" << endl;
+
+            // for each of the variables, read its label and number of states
+            vector<long> I_labels;
+            vector<size_t> I_dims;
+            VarSet I_vars;
+            I_labels.reserve( I_nrVars );
+            I_dims.reserve( I_nrVars );
+            for( size_t _i = 0; _i < I_nrVars; _i++ ) {
+                long label;
+                is >> label;
+                if( is.fail() )
+                    DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read variable labels for " + toString(I) + "'th factor");
+                I_labels.push_back( label );
+                I_dims.push_back( vars[label].states() );
+                I_vars |= vars[label];
             }
+            if( verbose >= 3 )
+                cout << "  labels: " << I_labels << ", dimensions " << I_dims << endl;
 
-            for( size_t I = 0; I < nr_f; I++ ) {
-                if( verbose >= 3 )
-                    cout << "Reading factor " << I << "..." << endl;
-
-                reverse(labels[I].begin(), labels[I].end());    // last label is least significant
-
-                size_t I_nrvars = factors[I].vars().size();
-                vector<size_t> I_dims;
-                for( size_t _i = 0; _i < I_nrvars; _i++ )
-                    I_dims.push_back(vars[labels[I][_i]].states());
-                if( verbose >= 3 )
-                    cout << "  labels: " << labels[I] << ", dimensions " << I_dims << endl;
-
-                // calculate permutation sigma (internally, members are sorted)
-                vector<size_t> sigma(I_nrvars,0);
-                VarSet::const_iterator j = factors[I].vars().begin();
-                for( size_t mi = 0; mi < I_nrvars; mi++,j++ ) {
-                    long search_for = j->label();
-                    vector<long>::iterator j_loc = find(labels[I].begin(),labels[I].end(),search_for);
-                    sigma[mi] = j_loc - labels[I].begin();
-                }
-                if( verbose >= 3 )
-                    cout << "  sigma: " << sigma << endl;
-
-                // calculate multindices
-                Permute permindex( I_dims, sigma );
-                
-                // read values
-                size_t nr_nonzeros;
-                is >> nr_nonzeros;
-                if( verbose >= 3 ) 
-                    cout << "  nonzeroes: " << nr_nonzeros << endl;
-                assert( nr_nonzeros == factors[I].states() );
-                for( size_t li = 0; li < nr_nonzeros; li++ ) {
-                    double val;
-                    is >> val;
-                    factors[I][permindex.convertLinearIndex( li )] = val;
-                }
-            }
+            // add the factor and the labels
+            factors.push_back( Factor(I_vars,0.0) );
+            labels.push_back( I_labels );
+        }
 
+        // for each factor, read its values
+        for( size_t I = 0; I < nrFacs; I++ ) {
             if( verbose >= 3 )
-                cout << "factors:" << factors << endl;
+                cout << "Reading factor " << I << "..." << endl;
 
+            // last label is least significant, so we reverse the label vector
+            reverse( labels[I].begin(), labels[I].end() );
 
-            is.close();
-        } catch (...) {
-            is.close();
-            throw;
+            // prepare a vector containing the dimensionalities of the variables for this factor
+            size_t I_nrVars = factors[I].vars().size();
+            vector<size_t> I_dims;
+            I_dims.reserve( I_nrVars );
+            for( size_t _i = 0; _i < I_nrVars; _i++ )
+                I_dims.push_back( vars[labels[I][_i]].states() );
+            if( verbose >= 3 )
+                cout << "  labels: " << labels[I] << ", dimensions " << I_dims << endl;
+
+            // calculate permutation sigma (internally, members are sorted canonically, 
+            // which may be different from the way they are sorted in the file)
+            vector<size_t> sigma( I_nrVars, 0 );
+            VarSet::const_iterator j = factors[I].vars().begin();
+            for( size_t mi = 0; mi < I_nrVars; mi++, j++ )
+                sigma[mi] = distance( labels[I].begin(), find( labels[I].begin(), labels[I].end(), j->label() ) );
+            if( verbose >= 3 )
+                cout << "  permutation: " << sigma << endl;
+
+            // construct permutation object
+            Permute permindex( I_dims, sigma );
+            
+            // read factor values
+            size_t nrNonZeros;
+            is >> nrNonZeros;
+            if( is.fail() )
+                DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of nonzero factor values for " + toString(I) + "'th factor");
+            if( verbose >= 3 ) 
+                cout << "  number of nonzero values: " << nrNonZeros << endl;
+            DAI_ASSERT( nrNonZeros == factors[I].states() );
+            for( size_t li = 0; li < nrNonZeros; li++ ) {
+                Real val;
+                is >> val;
+                if( is.fail() )
+                    DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read factor values of " + toString(I) + "'th factor");
+                // assign value after calculating its linear index corresponding to the permutation
+                factors[I][permindex.convertLinearIndex( li )] = val;
+            }
         }
+        if( verbose >= 3 )
+            cout << "factors:" << factors << endl;
+
+        // close file
+        is.close();
     } else
-        DAI_THROW(CANNOT_READ_FILE);
+        DAI_THROWE(CANNOT_READ_FILE,"Cannot read from file " + std::string(filename));
 
-    return pair<vector<Var>, vector<Factor> >(vars,factors);
+    return result;
 }
 
 
 int main( int argc, char *argv[] ) {
-    if ( argc != 6 ) {
-        cout << "Usage: " << argv[0] << " <filename.uai> <filename.uai.evid> <filename.fg> <type> <run_jtree>" << endl << endl;
-        cout << "Converts input files from UAI approximate inference evaluation to" << endl;
-        cout << "libDAI factor graph format." << endl;
+    if ( argc != 7 ) {
+        cout << "This program is part of libDAI - http://www.libdai.org/" << endl << endl;
+        cout << "Usage: ./uai2fg <filename.uai> <filename.uai.evid> <filename.fg> <type> <run_jtree> <verbose>" << endl << endl;
+        cout << "Converts input files in the UAI 2008 approximate inference evaluation format" << endl;
+        cout << "(see http://graphmod.ics.uci.edu/uai08/) to the libDAI factor graph format." << endl;
         cout << "Reads factor graph <filename.uai> and evidence <filename.uai.evid>" << endl;
-        cout << "and writes the clamped factor graph to <filename.fg>." << endl;
-        cout << "If type==0, uses surgery, otherwise, uses safe method." << endl;
+        cout << "and writes the resulting clamped factor graph to <filename.fg>." << endl;
+        cout << "If type==0, uses surgery (recommended), otherwise, uses just adds delta factors." << endl;
         cout << "If run_jtree!=0, runs a junction tree and reports the results in the UAI 2008 results file format." << endl;
         return 1;
     } else {
-        long verbose = 0;
+        long verbose = atoi( argv[6] );
         long type = atoi( argv[4] );
         bool run_jtree = atoi( argv[5] );
 
-        pair<vector<Var>, vector<Factor> > input;
-        input = ReadUAIFGFile( argv[1], verbose );
-        FactorGraph fg0( input.second );
+        // read factor graph and evidence
+        pair<vector<Var>, vector<Factor> > varfacs = ReadUAIFGFile( argv[1], verbose );
         map<size_t,size_t> evid = ReadUAIEvidenceFile( argv[2] );
-        double logZcorr = 0.0;
+        vector<Var>& vars = varfacs.first;
+        vector<Factor>& facs = varfacs.second;
+
+        // construct unclamped factor graph
+        FactorGraph fg0( facs.begin(), facs.end(), vars.begin(), vars.end(), facs.size(), vars.size() );
+
+        // change factor graph to reflect observed evidence
         if( type == 0 ) {
-            for( size_t I = 0; I < input.second.size(); I++ ) {
+            // replace factors with clamped variables with slices
+            for( size_t I = 0; I < facs.size(); I++ ) {
                 for( map<size_t,size_t>::const_iterator e = evid.begin(); e != evid.end(); e++ ) {
-                    if( input.second[I].vars() >> input.first[e->first] ) {
+                    if( facs[I].vars() >> vars[e->first] ) {
                         if( verbose >= 2 )
-                            cout << "clamping " << e->first << " to value " << e->second << " in factor " << I << " = " << input.second[I].vars() << endl;
-                        input.second[I] = input.second[I].slice( input.first[e->first], e->second );
+                            cout << "Clamping " << e->first << " to value " << e->second << " in factor " << I << " = " << facs[I].vars() << endl;
+                        facs[I] = facs[I].slice( vars[e->first], e->second );
                         if( verbose >= 2 )
-                            cout << "...remaining vars: " << input.second[I].vars() << endl;
+                            cout << "...remaining vars: " << facs[I].vars() << endl;
                     }
                 }
             }
             // remove empty factors
-            for( vector<Factor>::iterator I = input.second.begin(); I != input.second.end(); )
+            double logZcorr = 0.0;
+            for( vector<Factor>::iterator I = facs.begin(); I != facs.end(); )
                 if( I->vars().size() == 0 ) {
                     logZcorr += std::log( (Real)(*I)[0] );
-                    I = input.second.erase( I );
+                    I = facs.erase( I );
                 } else
                     I++;
             // multiply with logZcorr constant
-            if( input.second.size() == 0 )
-                input.second.push_back( Factor( VarSet(), std::exp(logZcorr) ) );
+            if( facs.size() == 0 )
+                facs.push_back( Factor( VarSet(), std::exp(logZcorr) ) );
             else
-                input.second.front() *= std::exp(logZcorr);
+                facs.front() *= std::exp(logZcorr);
         }
-        for( map<size_t,size_t>::const_iterator e = evid.begin(); e != evid.end(); e++ ) {
-            // add delta factors
-            input.second.push_back( Factor( input.first[e->first], 0.0 ) );
-            input.second.back()[e->second] = 1.0;
-        }
-        FactorGraph fg(input.second);
+        // add delta factors corresponding to observed variable values
+        for( map<size_t,size_t>::const_iterator e = evid.begin(); e != evid.end(); e++ )
+            facs.push_back( createFactorDelta( vars[e->first], e->second ) );
+
+        // construct clamped factor graph
+        FactorGraph fg( facs.begin(), facs.end(), vars.begin(), vars.end(), facs.size(), vars.size() );
 
+        // write it to a file
         fg.WriteToFile( argv[3] );
 
+        // if requested, perform various inference tasks
         if( run_jtree ) {
+            // construct junction tree on unclamped factor graph
             JTree jt0( fg0, PropertySet()("updates",string("HUGIN")) );
             jt0.init();
             jt0.run();
 
+            // construct junction tree on clamped factor graph
             JTree jt( fg, PropertySet()("updates",string("HUGIN")) );
             jt.init();
             jt.run();
 
+            // output probability of evidence
             cout.precision( 8 );
             if( evid.size() )
                 cout << "z " << (jt.logZ() - jt0.logZ()) / dai::log((Real)10.0) << endl;
             else
                 cout << "z " << jt.logZ() / dai::log((Real)10.0) << endl;
 
+            // output variable marginals
             cout << "m " << jt.nrVars() << " ";
             for( size_t i = 0; i < jt.nrVars(); i++ ) {
                 cout << jt.var(i).states() << " ";
@@ -244,7 +287,7 @@ int main( int argc, char *argv[] ) {
             }
             cout << endl;
 
-            cout << "s ";
+            // calculate MAP state
             jt.props.inference = JTree::Properties::InfType::MAXPROD;
             jt.init();
             jt.run();
@@ -255,11 +298,13 @@ int main( int argc, char *argv[] ) {
             double log_MAP_prob = 0.0;
             for( size_t I = 0; I < jt.nrFactors(); I++ )
                 log_MAP_prob += dai::log( jt.factor(I)[calcLinearState( jt.factor(I).vars(), state )] );
+
+            // output MAP state
+            cout << "s ";
             if( evid.size() )
                 cout << (log_MAP_prob - jt0.logZ()) / dai::log((Real)10.0) << " ";
             else
                 cout << log_MAP_prob / dai::log((Real)10.0) << " ";
-
             cout << jt.nrVars() << " ";
             for( size_t i = 0; i < jt.nrVars(); i++ )
                 cout << MAP[i] << " ";