-git b6efbfd9e4e76de65c44e4060a9b56f99543b70a (head of branch eaton before merge)
---------------------------------------------------------------------------------
+* Added work-around for bug in Boost Graph Library
+* Improvements of TFactor<T>:
+ - Extended functionality of TFactor<T>::operators +,-,+=,-= to handle different VarSets
+ - Added TFactor<T>::maxMarginal (similar to marginal but with max instead of sum)
+* Added BipartiteGraph::eraseEdge
+* Removed all the virtual default constructors *::create(), as they are not used.
+* Fixed bug in MaxSpanningTree (it wrongly assumed that the tree was not empty).
+* [Charlie Vaske] Added Expectation Maximization code.
+* Added MatLab QuickStart to README.
+* MEX file dai now also returns variable and factor beliefs.
* Cleanups and extra documentation of the code contributed by Frederik Eaton
* Removed erroneous 'inline' directives in gibbs.cpp
* [Frederik Eaton] Added script for automatically generating properties code (used by CBP and BBP)
errors (thanks to Dan Preston for reporting this)
* toc() now returns POSIX system time with maximum accuracy of microseconds
* Exception objects now remember their error code
-* Added examples/example_springler.cpp
+* Added examples/example_sprinkler.cpp
git 065eae35cbfcc36f1a945ae3053c80c23f366306
LIB=lib
# Define build targets
-TARGETS=tests utils lib examples testregression
+TARGETS=tests utils lib examples testregression testem
ifdef WITH_DOC
TARGETS:=$(TARGETS) doc
endif
endif
# Define conditional build targets
-OBJECTS:=exactinf$(OE)
+OBJECTS:=exactinf$(OE) evidence$(OE) emalg$(OE)
ifdef WITH_BP
CCFLAGS:=$(CCFLAGS) -DDAI_WITH_BP
OBJECTS:=$(OBJECTS) bp$(OE)
matlabs : matlab/dai$(ME) matlab/dai_readfg$(ME) matlab/dai_writefg$(ME) matlab/dai_potstrength$(ME)
-tests : tests/testdai$(EE) tests/testbbp$(EE)
+tests : tests/testdai$(EE) tests/testem/testem$(EE) tests/testbbp$(EE)
utils : utils/createfg$(EE) utils/fg2dot$(EE) utils/fginfo$(EE)
gibbs$(OE) : $(SRC)/gibbs.cpp $(INC)/gibbs.h $(HEADERS)
$(CC) -c $(SRC)/gibbs.cpp
+evidence$(OE) : $(SRC)/evidence.cpp $(INC)/evidence.h $(HEADERS)
+ $(CC) -c $(SRC)/evidence.cpp
+
+emalg$(OE) : $(SRC)/emalg.cpp $(INC)/emalg.h $(INC)/evidence.h $(HEADERS)
+ $(CC) -c $(SRC)/emalg.cpp
+
properties$(OE) : $(SRC)/properties.cpp $(HEADERS)
$(CC) -c $(SRC)/properties.cpp
tests/testdai$(EE) : tests/testdai.cpp $(HEADERS) $(LIB)/libdai$(LE)
$(CC) $(CCO)tests/testdai$(EE) tests/testdai.cpp $(LIBS) $(BOOSTLIBS)
+tests/testem/testem$(EE) : tests/testem/testem.cpp $(HEADERS) $(LIB)/libdai$(LE)
+ $(CC) $(CCO)$@ $< $(LIBS) $(BOOSTLIBS)
tests/testbbp$(EE) : tests/testbbp.cpp $(HEADERS) $(LIB)/libdai$(LE)
$(CC) $(CCO)tests/testbbp$(EE) tests/testbbp.cpp $(LIBS)
# REGRESSION TESTS
###################
-ifneq ($(OS),WINDOWS)
testregression : tests/testdai$(EE)
@echo Starting regression test...this can take a minute or so!
+ifneq ($(OS),WINDOWS)
cd tests && ./testregression && cd ..
else
-testregression : tests/testdai$(EE)
- @echo Starting regression test...this can take a minute or so!
cd tests && testregression.bat && cd ..
endif
+testem : tests/testem/testem$(EE)
+ @echo Starting EM tests
+ifneq ($(OS),WINDOWS)
+ cd tests/testem && ./runtests && cd ../..
+else
+ cd tests\testem && runtests && cd ..\..
+endif
+
# DOCUMENTATION
################
-rm *$(OE)
-rm matlab/*$(ME)
-rm examples/example$(EE) examples/example_bipgraph$(EE) examples/example_varset$(EE) examples/example_sprinkler$(EE)
- -rm tests/testdai$(EE) tests/testbbp$(EE)
+ -rm tests/testdai$(EE) tests/testem/testem$(EE) tests/testbbp$(EE)
-rm utils/fg2dot$(EE) utils/createfg$(EE) utils/fginfo$(EE)
-rm -R doc
-rm -R lib
else
.PHONY : clean
clean :
- -del *$(OE) *.ilk *.pdb *$(EE) matlab\*$(ME) examples\*$(EE) examples\*.ilk examples\*.pdb tests\testdai$(EE) tests\*.pdb tests\*.ilk utils\*$(EE) utils\*.pdb utils\*.ilk $(LIB)\libdai$(LE)
+ -del *$(OE) *.ilk *.pdb *$(EE) matlab\*$(ME) examples\*$(EE) examples\*.ilk examples\*.pdb tests\testdai$(EE) tests\testem\testem$(EE) tests\*.pdb tests\*.ilk utils\*$(EE) utils\*.pdb utils\*.ilk $(LIB)\libdai$(LE)
endif
DEBUG=true
# Build matlab interface?
WITH_MATLAB=
-# New/old matlab version?
+# MatLab version 7.3 (R2006b) or newer?
NEW_MATLAB=true
# OPERATING SYSTEM
DEBUG=true
# Build matlab interface?
WITH_MATLAB=
-# New/old matlab version?
+# MatLab version 7.3 (R2006b) or newer?
NEW_MATLAB=true
# OPERATING SYSTEM
DEBUG=true
# Build matlab interface?
WITH_MATLAB=
-# New/old matlab version?
+# MatLab version 7.3 (R2006b) or newer?
NEW_MATLAB=true
# OPERATING SYSTEM
DEBUG=true
# Build matlab interface?
WITH_MATLAB=
-# New/old matlab version?
+# MatLab version 7.3 (R2006b) or newer?
NEW_MATLAB=true
# OPERATING SYSTEM
Martijn Leisink
Giuseppe Passino
Frederik Eaton
+Charlie Vaske
Bastian Wemmenhove
Christian Wojek
Claudio Lima
Acknowledgements section. The appropriate citation is:
J. M. Mooij (2008) "libDAI 0.2.2: A free/open source C++ library for Discrete
-Approximate Inference methods", http://mloss.org/software/view/77/.
+Approximate Inference methods", http://www.libdai.org
Moreover, as a personal note, I would appreciate it if you would email me with
citations of papers referencing this work so I can mention them to my funding
* Gibbs sampler;
* Conditioned BP [EaG09].
+In addition, libDAI supports parameter learning of conditional probability
+tables by Expectation Maximization.
+
Why C++?
--------
Because libDAI is implemented in C++, it is very fast compared with
implementations in MatLab (a factor 1000 faster is not uncommon). libDAI does
-provide a MatLab interface for easy integration with MatLab.
+provide a (limited) MatLab interface for easy integration with MatLab.
Releases
--------
-Releases can be obtained from http://mloss.org/software/view/77/
+Releases can be obtained from www.libdai.org
License: GNU Public License v2 (or higher).
libDAI-0.2 December 1, 2006
To build the libDAI source, first copy a template Makefile.* to Makefile.conf
(for example, copy Makefile.LINUX to Makefile.conf if you use GNU/Linux).
Then, edit the Makefile.conf template to adapt it to your local setup.
-Especially directories may change from system to system. Finally, run
+Especially directories may differ from system to system. Finally, run
make
or the more elaborate test program:
tests\testdai --aliases tests\aliases.conf --filename tests\alarm.fg --methods JTREE_HUGIN BP_SEQMAX
+
+
+Quick start (MatLab)
+--------------------
+You need:
+- MatLab
+- The platform-dependent requirements described above
+
+First, you need to build the libDAI source as described above for your
+platform. By default, the MatLab interface is disabled, so before compiling the
+source, you have to enable it in the Makefile.conf by setting
+"WITH_MATLAB=true". Also, you have to configure the MatLab-specific parts of
+Makefile.conf to match your system (in particular, the Makefile variables ME,
+MATLABDIR and MEX). The MEX file extension depends on your platform; for a
+64-bit linux x86_64 system this would be "ME=.mexa64", for a 32-bit linux x86
+system this would be "ME=.mexglx". If you are unsure about your MEX file
+extension: it needs to be the same as what the MatLab command "mexext" returns.
+The required MEX files are built by issuing
+
+ make
+
+from the command line. The MatLab interface is much less powerful than using
+libDAI from C++. There are two reasons for this: (i) it is boring to write MEX
+files; (ii) the large performance penalty paid when large data structures (like
+factor graphs) have to be converted between their native C++ data structure to
+something that MatLab understands.
+
+A simple example of how to use the MatLab interface is the following (entered
+at the MatLab prompt), which performs exact inference by the junction tree
+algorithm and approximate inference by belief propagation on the ALARM network:
+
+ cd path_to_libdai/matlab
+ [psi] = dai_readfg ('../examples/alarm.fg');
+ [logZ,q,md,qv,qf] = dai (psi, 'JTREE', '[updates=HUGIN,verbose=0]')
+ [logZ,q,md,qv,qf] = dai (psi, 'BP', '[updates=SEQMAX,tol=1e-9,maxiter=10000,logdomain=0]')
+
+where "path_to_libdai" has to be replaced with the directory in which libDAI
+was installed. For other algorithms and their parameters, see
+tests/aliases.conf.
#include <dai/daialg.h>
#include <dai/properties.h>
#include <dai/exactinf.h>
+#include <dai/evidence.h>
+#include <dai/emalg.h>
#ifdef DAI_WITH_BP
#include <dai/bp.h>
#endif
/// Removes node n2 of type 2 and all incident edges.
void erase2( size_t n2 );
+ /// Removes edge between node n1 of type 1 and node n2 of type 2.
+ void eraseEdge( size_t n1, size_t n2 ) {
+ assert( n1 < nr1() );
+ assert( n2 < nr2() );
+ for( Neighbors::iterator i1 = _nb1[n1].begin(); i1 != _nb1[n1].end(); i1++ )
+ if( i1->node == n2 ) {
+ _nb1[n1].erase( i1 );
+ break;
+ }
+ for( Neighbors::iterator i2 = _nb2[n2].begin(); i2 != _nb2[n2].end(); i2++ )
+ if( i2->node == n1 ) {
+ _nb2[n2].erase( i2 );
+ break;
+ }
+ }
+
/// Adds an edge between node n1 of type 1 and node n2 of type 2.
/** If check == true, only adds the edge if it does not exist already.
*/
/// @name General InfAlg interface
//@{
virtual BP* clone() const { return new BP(*this); }
- virtual BP* create() const { return new BP(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const;
virtual Factor belief( const VarSet &ns ) const;
/// Returns a pointer to a new, cloned copy of *this (i.e., virtual copy constructor)
virtual InfAlg* clone() const = 0;
- /// Returns a pointer to a newly constructed object *this (i.e., virtual default constructor)
- virtual InfAlg* create() const { DAI_THROW(NOT_IMPLEMENTED); }
-
/// Identifies itself for logging purposes
virtual std::string identify() const = 0;
* - Gibbs sampler;
* - Conditioned BP [\ref EaG09];
*
+ * In addition, libDAI supports parameter learning of conditional probability
+ * tables by Expectation Maximization.
+ *
* \section language Why C++?
* Because libDAI is implemented in C++, it is very fast compared with
* implementations in MatLab (a factor 1000 faster is not uncommon).
*/
-/** \page biboliography Bibliography
+/** \page bibliography Bibliography
* \section Bibliograpy
* \anchor KFL01 \ref KFL01
* F. R. Kschischang and B. J. Frey and H.-A. Loeliger (2001):
--- /dev/null
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#ifndef __defined_libdai_emalg_h
+#define __defined_libdai_emalg_h
+
+
+#include <vector>
+#include <map>
+
+#include <dai/factor.h>
+#include <dai/daialg.h>
+#include <dai/evidence.h>
+#include <dai/index.h>
+#include <dai/properties.h>
+
+
+/// \file
+/// \brief Defines classes related to Expectation Maximization: EMAlg, ParameterEstimation, CondProbEstimation and SharedParameters
+/// \todo Describe EM file format
+
+
+namespace dai {
+
+
+/// Interface for a parameter estimation method.
+/** This parameter estimation interface is based on sufficient statistics.
+ * Implementations are responsible for collecting data from a probability
+ * vector passed to it from a SharedParameters container object.
+ *
+ * Implementations of this interface should register a factory function
+ * via the static ParameterEstimation::registerMethod function.
+ * The default registry only contains CondProbEstimation, named
+ * "ConditionalProbEstimation".
+ */
+class ParameterEstimation {
+ public:
+ /// A pointer to a factory function.
+ typedef ParameterEstimation* (*ParamEstFactory)( const PropertySet& );
+
+ /// Virtual destructor for deleting pointers to derived classes.
+ virtual ~ParameterEstimation() {}
+ /// Virtual copy constructor.
+ virtual ParameterEstimation* clone() const = 0;
+
+ /// General factory method for construction of ParameterEstimation subclasses.
+ static ParameterEstimation* construct( const std::string &method, const PropertySet &p );
+
+ /// Register a subclass so that it can be used with ParameterEstimation::construct.
+ static void registerMethod( const std::string &method, const ParamEstFactory &f ) {
+ if( _registry == NULL )
+ loadDefaultRegistry();
+ (*_registry)[method] = f;
+ }
+
+ /// Estimate the factor using the accumulated sufficient statistics and reset.
+ virtual Prob estimate() = 0;
+
+ /// Accumulate the sufficient statistics for p.
+ virtual void addSufficientStatistics( const Prob &p ) = 0;
+
+ /// Returns the size of the Prob that should be passed to addSufficientStatistics.
+ virtual size_t probSize() const = 0;
+
+ private:
+ /// A static registry containing all methods registered so far.
+ static std::map<std::string, ParamEstFactory> *_registry;
+
+ /// Registers default ParameterEstimation subclasses (currently, only CondProbEstimation).
+ static void loadDefaultRegistry();
+};
+
+
+/// Estimates the parameters of a conditional probability table, using pseudocounts.
+class CondProbEstimation : private ParameterEstimation {
+ private:
+ /// Number of states of the variable of interest
+ size_t _target_dim;
+ /// Current pseudocounts
+ Prob _stats;
+ /// Initial pseudocounts
+ Prob _initial_stats;
+
+ public:
+ /// Constructor
+ /** For a conditional probability \f$ P( X | Y ) \f$,
+ * \param target_dimension should equal \f$ | X | \f$
+ * \param pseudocounts has length \f$ |X| \cdot |Y| \f$
+ */
+ CondProbEstimation( size_t target_dimension, const Prob &pseudocounts );
+
+ /// Virtual constructor, using a PropertySet.
+ /** Some keys in the PropertySet are required.
+ * For a conditional probability \f$ P( X | Y ) \f$,
+ * - target_dimension should be equal to \f$ | X | \f$
+ * - total_dimension should be equal to \f$ |X| \cdot |Y| \f$
+ *
+ * An optional key is:
+ * - pseudo_count, which specifies the initial counts (defaults to 1)
+ */
+ static ParameterEstimation* factory( const PropertySet &p );
+
+ /// Virtual copy constructor
+ virtual ParameterEstimation* clone() const { return new CondProbEstimation( _target_dim, _initial_stats ); }
+
+ /// Virtual destructor
+ virtual ~CondProbEstimation() {}
+
+ /// Returns an estimate of the conditional probability distribution.
+ /** The format of the resulting Prob keeps all the values for
+ * \f$ P(X | Y=y) \f$ in sequential order in the array.
+ */
+ virtual Prob estimate();
+
+ /// Accumulate sufficient statistics from the expectations in p.
+ virtual void addSufficientStatistics( const Prob &p );
+
+ /// Returns the required size for arguments to addSufficientStatistics.
+ virtual size_t probSize() const { return _stats.size(); }
+};
+
+
+/// A single factor or set of factors whose parameters should be estimated.
+/** To ensure that parameters can be shared between different factors during
+ * EM learning, each factor's values are reordered to match a desired variable
+ * ordering. The ordering of the variables in a factor may therefore differ
+ * from the canonical ordering used in libDAI. The SharedParameters
+ * class couples one or more factors (together with the specified orderings
+ * of the variables) with a ParameterEstimation object, taking care of the
+ * necessary permutations of the factor entries / parameters.
+ */
+class SharedParameters {
+ public:
+ /// Convenience label for an index into a factor in a FactorGraph.
+ typedef size_t FactorIndex;
+ /// Convenience label for a grouping of factor orientations.
+ typedef std::map<FactorIndex, std::vector<Var> > FactorOrientations;
+
+ private:
+ /// Maps factor indices to the corresponding VarSets
+ std::map<FactorIndex, VarSet> _varsets;
+ /// Maps factor indices to the corresponding Permute objects that permute the desired ordering into the canonical ordering
+ std::map<FactorIndex, Permute> _perms;
+ /// Maps factor indices to the corresponding desired variable orderings
+ FactorOrientations _varorders;
+ /// Parameter estimation method to be used
+ ParameterEstimation *_estimation;
+ /// Indicates whether the object pointed to by _estimation should be deleted upon destruction
+ bool _deleteEstimation;
+
+ /// Calculates the permutation that permutes the variables in varorder into the canonical ordering
+ static Permute calculatePermutation( const std::vector<Var> &varorder, VarSet &outVS );
+
+ /// Initializes _varsets and _perms from _varorders
+ void setPermsAndVarSetsFromVarOrders();
+
+ public:
+ /// Copy constructor
+ SharedParameters( const SharedParameters &sp );
+
+ /// Constructor
+ /** \param varorders all the factor orientations for this parameter
+ * \param estimation a pointer to the parameter estimation method
+ */
+ SharedParameters( const FactorOrientations &varorders, ParameterEstimation *estimation );
+
+ /// Constructor for making an object from a stream and a factor graph
+ SharedParameters( std::istream &is, const FactorGraph &fg_varlookup );
+
+ /// Destructor
+ ~SharedParameters() {
+ if( _deleteEstimation )
+ delete _estimation;
+ }
+
+ /// Collect the necessary statistics from expected values
+ void collectSufficientStatistics( InfAlg &alg );
+
+ /// Estimate and set the shared parameters
+ void setParameters( FactorGraph &fg );
+};
+
+
+/// A MaximizationStep groups together several parameter estimation tasks into a single unit.
+class MaximizationStep {
+ private:
+ std::vector<SharedParameters> _params;
+
+ public:
+ /// Default constructor
+ MaximizationStep() : _params() {}
+
+ /// Constructor from a vector of SharedParameters objects
+ MaximizationStep( std::vector<SharedParameters> &maximizations ) : _params(maximizations) {}
+
+ /// Constructor from an input stream and a corresponding factor graph
+ MaximizationStep( std::istream &is, const FactorGraph &fg_varlookup );
+
+ /// Collect the beliefs from this InfAlg as expectations for the next Maximization step.
+ void addExpectations( InfAlg &alg );
+
+ /// Using all of the currently added expectations, make new factors with maximized parameters and set them in the FactorGraph.
+ void maximize( FactorGraph &fg );
+};
+
+
+/// EMAlg performs Expectation Maximization to learn factor parameters.
+/** This requires specifying:
+ * - Evidence (instances of observations from the graphical model),
+ * - InfAlg for performing the E-step, which includes the factor graph,
+ * - a vector of MaximizationSteps steps to be performed.
+ *
+ * This implementation can perform incremental EM by using multiple
+ * MaximizationSteps. An expectation step is performed between execution
+ * of each MaximizationStep. A call to iterate() will cycle through all
+ * MaximizationSteps.
+ *
+ * Having multiple and separate maximization steps allows for maximizing some
+ * parameters, performing another E step, and then maximizing separate
+ * parameters, which may result in faster convergence in some cases.
+ */
+class EMAlg {
+ private:
+ /// All the data samples used during learning
+ const Evidence &_evidence;
+
+ /// How to do the expectation step
+ InfAlg &_estep;
+
+ /// The maximization steps to take
+ std::vector<MaximizationStep> _msteps;
+
+ /// Number of iterations done
+ size_t _iters;
+
+ /// History of likelihoods
+ std::vector<Real> _lastLogZ;
+
+ /// Maximum number of iterations
+ size_t _max_iters;
+
+ /// Convergence tolerance
+ Real _log_z_tol;
+
+ public:
+ /// Key for setting maximum iterations @see setTermConditions
+ static const std::string MAX_ITERS_KEY;
+ /// Default maximum iterations @see setTermConditions
+ static const size_t MAX_ITERS_DEFAULT;
+ /// Key for setting likelihood termination condition @see setTermConditions
+ static const std::string LOG_Z_TOL_KEY;
+ /// Default likelihood tolerance @see setTermConditions
+ static const Real LOG_Z_TOL_DEFAULT;
+
+ /// Construct an EMAlg from all these objects
+ EMAlg( const Evidence &evidence, InfAlg &estep, std::vector<MaximizationStep> &msteps, const PropertySet &termconditions )
+ : _evidence(evidence), _estep(estep), _msteps(msteps), _iters(0), _lastLogZ(), _max_iters(MAX_ITERS_DEFAULT), _log_z_tol(LOG_Z_TOL_DEFAULT)
+ {
+ setTermConditions( termconditions );
+ }
+
+ /// Construct an EMAlg from an Evidence object, an InfAlg object, and an input stream
+ EMAlg( const Evidence &evidence, InfAlg &estep, std::istream &mstep_file );
+
+ /// Change the conditions for termination
+ /** There are two possible parameters in the PropertySet
+ * - max_iters maximum number of iterations
+ * - log_z_tol proportion of increase in logZ
+ *
+ * \see hasSatisifiedTermConditions()
+ */
+ void setTermConditions( const PropertySet &p );
+
+ /// Determine if the termination conditions have been met.
+ /** There are two sufficient termination conditions:
+ * -# the maximum number of iterations has been performed
+ * -# the ratio of logZ increase over previous logZ is less than the
+ * tolerance, i.e.,
+ * \f$ \frac{\log(Z_t) - \log(Z_{t-1})}{| \log(Z_{t-1}) | } < \mathrm{tol} \f$.
+ */
+ bool hasSatisfiedTermConditions() const;
+
+ /// Returns number of iterations done so far
+ size_t getCurrentIters() const { return _iters; }
+
+ /// Perform an iteration over all maximization steps
+ Real iterate();
+
+ /// Perform an iteration over a single MaximizationStep
+ Real iterate( MaximizationStep &mstep );
+
+ /// Iterate until termination conditions are satisfied
+ void run();
+};
+
+
+} // end of namespace dai
+
+
+#endif
--- /dev/null
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+/// \file
+/// \brief Defines classes Evidence and Observation
+/// \todo Describe tabular data file format
+
+
+#ifndef __defined_libdai_evidence_h
+#define __defined_libdai_evidence_h
+
+
+#include <istream>
+#include <dai/daialg.h>
+
+
+namespace dai {
+
+
+/// Stores observed values of a subset of variables
+class Observation {
+ private:
+ /// Used to store the state of some variables
+ std::map<Var, size_t> _obs;
+
+ public:
+ /// Default constructor
+ Observation() : _obs() {}
+
+ /// Get all observations
+ const std::map<Var, size_t>& observations() const { return _obs; }
+
+ /// Add an observation
+ void addObservation( Var node, size_t setting );
+
+ /// Clamp variables in the graphical model to their observed values
+ void applyEvidence( InfAlg& alg ) const;
+};
+
+
+/// Stores multiple joint observations of sets of variables.
+/** The Evidence class stores multiple samples, where each sample is the joint
+ * observation of the states of some variables.
+ */
+class Evidence {
+ private:
+ /// Each sample is the joint observation of the states of some variables
+ std::vector<Observation> _samples;
+
+ public:
+ /// Default constructor
+ Evidence() : _samples() {}
+
+ /// Read in tabular data from a stream.
+ /** Each line contains one sample, and the first line is a header line with names.
+ */
+ void addEvidenceTabFile( std::istream& is, std::map<std::string, Var> &varMap );
+
+ /// Read in tabular data from a stream.
+ /** Each line contains one sample, and the first line is a header line with
+ * variable labels which should correspond with a subset of the variables in fg.
+ */
+ void addEvidenceTabFile( std::istream& is, FactorGraph& fg );
+
+ /// Returns number of stored samples
+ size_t nrSamples() const { return _samples.size(); }
+
+ /// @name Iterator interface
+ //@{
+ /// Iterator over the elements
+ typedef std::vector<Observation>::iterator iterator;
+ /// Constant iterator over the elements
+ typedef std::vector<Observation>::const_iterator const_iterator;
+
+ /// Returns iterator that points to the first element
+ iterator begin() { return _samples.begin(); }
+ /// Returns constant iterator that points to the first element
+ const_iterator begin() const { return _samples.begin(); }
+ /// Returns iterator that points beyond the last element
+ iterator end() { return _samples.end(); }
+ /// Returns constant iterator that points beyond the last element
+ const_iterator end() const { return _samples.end(); }
+ //@}
+};
+
+
+} // end of namespace dai
+
+
+#endif
/// @name General InfAlg interface
//@{
virtual ExactInf* clone() const { return new ExactInf(*this); }
- virtual ExactInf* create() const { return new ExactInf(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const { return beliefV( findVar( n ) ); }
virtual Factor belief( const VarSet &ns ) const;
IMPOSSIBLE_TYPECAST,
INTERNAL_ERROR,
NOT_NORMALIZABLE,
+ INVALID_EVIDENCE_FILE,
+ INVALID_EVIDENCE_LINE,
+ INVALID_EVIDENCE_OBSERVATION,
+ INVALID_SHARED_PARAMETERS_ORDER,
+ INVALID_SHARED_PARAMETERS_INPUT_LINE,
+ UNKNOWN_PARAMETER_ESTIMATION_METHOD,
NUM_ERRORS}; // NUM_ERRORS should be the last entry
/// Constructor
#include <iostream>
+#include <functional>
#include <cmath>
#include <dai/prob.h>
#include <dai/varset.h>
namespace dai {
+/// Function object similar to std::divides(), but different in that dividing by zero results in zero
+template<typename T> struct divides0 : public std::binary_function<T, T, T> {
+ T operator()(const T& i, const T& j) const {
+ if( j == (T)0 )
+ return (T)0;
+ else
+ return i / j;
+ }
+};
+
+
/// Represents a (probability) factor.
/** Mathematically, a \e factor is a function mapping joint states of some
* variables to the nonnegative real numbers.
return *this;
}
+ /// Adds the TFactor f to *this
+ TFactor<T>& operator+= (const TFactor<T>& f) {
+ if( f._vs == _vs ) // optimize special case
+ _p += f._p;
+ else
+ *this = (*this + f);
+ return *this;
+ }
+
+ /// Subtracts the TFactor f from *this
+ TFactor<T>& operator-= (const TFactor<T>& f) {
+ if( f._vs == _vs ) // optimize special case
+ _p -= f._p;
+ else
+ *this = (*this - f);
+ return *this;
+ }
+
/// Returns product of *this with the TFactor f
/** The product of two factors is defined as follows: if
* \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
* \f[fg : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) g(x_M).\f]
*/
- TFactor<T> operator* (const TFactor<T>& f) const;
+ TFactor<T> operator* (const TFactor<T>& f) const {
+ return pointwiseOp(*this,f,std::multiplies<T>());
+ }
/// Returns quotient of *this by the TFactor f
/** The quotient of two factors is defined as follows: if
* \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
* \f[\frac{f}{g} : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto \frac{f(x_L)}{g(x_M)}.\f]
*/
- TFactor<T> operator/ (const TFactor<T>& f) const;
-
- /// Adds the TFactor f to *this
- /** \pre this->vars() == f.vars()
- */
- TFactor<T>& operator+= (const TFactor<T>& f) {
-#ifdef DAI_DEBUG
- assert( f._vs == _vs );
-#endif
- _p += f._p;
- return *this;
- }
-
- /// Subtracts the TFactor f from *this
- /** \pre this->vars() == f.vars()
- */
- TFactor<T>& operator-= (const TFactor<T>& f) {
-#ifdef DAI_DEBUG
- assert( f._vs == _vs );
-#endif
- _p -= f._p;
- return *this;
+ TFactor<T> operator/ (const TFactor<T>& f) const {
+ return pointwiseOp(*this,f,divides0<T>());
}
/// Returns sum of *this and the TFactor f
- /** \pre this->vars() == f.vars()
+ /** The sum of two factors is defined as follows: if
+ * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
+ * \f[f+g : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) + g(x_M).\f]
*/
TFactor<T> operator+ (const TFactor<T>& f) const {
-#ifdef DAI_DEBUG
- assert( f._vs == _vs );
-#endif
- TFactor<T> sum(*this);
- sum._p += f._p;
- return sum;
+ return pointwiseOp(*this,f,std::plus<T>());
}
/// Returns *this minus the TFactor f
- /** \pre this->vars() == f.vars()
+ /** The difference of two factors is defined as follows: if
+ * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
+ * \f[f-g : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) - g(x_M).\f]
*/
TFactor<T> operator- (const TFactor<T>& f) const {
-#ifdef DAI_DEBUG
- assert( f._vs == _vs );
-#endif
- TFactor<T> sum(*this);
- sum._p -= f._p;
- return sum;
+ return pointwiseOp(*this,f,std::minus<T>());
}
/// Returns marginal on ns, obtained by summing out all variables except those in ns, and normalizing the result if normed==true
TFactor<T> marginal(const VarSet & ns, bool normed=true) const;
+ /// Returns max-marginal on ns, obtained by maximizing all variables except those in ns, and normalizing the result if normed==true
+ TFactor<T> maxMarginal(const VarSet & ns, bool normed=true) const;
+
/// Embeds this factor in a larger VarSet
/** \pre vars() should be a subset of ns
*
if( _vs == ns )
return *this;
else
- return (*this) * TFactor<T>(ns / _vs, 1);
+ return (*this) * TFactor<T>(ns / _vs, (T)1);
}
/// Returns true if *this has NaN values
}
-template<typename T> TFactor<T> TFactor<T>::operator* (const TFactor<T>& f) const {
- if( f._vs == _vs ) { // optimizate special case
- TFactor<T> prod(*this);
- prod._p *= f._p;
- return prod;
- } else {
- TFactor<T> prod( _vs | f._vs, 0.0 );
+template<typename T> TFactor<T> TFactor<T>::maxMarginal(const VarSet & ns, bool normed) const {
+ VarSet res_ns = ns & _vs;
+
+ TFactor<T> res( res_ns, 0.0 );
- IndexFor i1(_vs, prod._vs);
- IndexFor i2(f._vs, prod._vs);
+ IndexFor i_res( res_ns, _vs );
+ for( size_t i = 0; i < _p.size(); i++, ++i_res )
+ if( _p[i] > res._p[i_res] )
+ res._p[i_res] = _p[i];
- for( size_t i = 0; i < prod._p.size(); i++, ++i1, ++i2 )
- prod._p[i] += _p[i1] * f._p[i2];
+ if( normed )
+ res.normalize( Prob::NORMPROB );
- return prod;
- }
+ return res;
}
-template<typename T> TFactor<T> TFactor<T>::operator/ (const TFactor<T>& f) const {
- if( f._vs == _vs ) { // optimizate special case
- TFactor<T> quot(*this);
- quot._p /= f._p;
- return quot;
+template<typename T, typename binaryOp> TFactor<T> pointwiseOp( const TFactor<T> &f, const TFactor<T> &g, binaryOp op ) {
+ if( f.vars() == g.vars() ) { // optimizate special case
+ TFactor<T> result(f);
+ for( size_t i = 0; i < result.states(); i++ )
+ result[i] = op( result[i], g[i] );
+ return result;
} else {
- TFactor<T> quot( _vs | f._vs, 0.0 );
+ TFactor<T> result( f.vars() | g.vars(), 0.0 );
- IndexFor i1(_vs, quot._vs);
- IndexFor i2(f._vs, quot._vs);
+ IndexFor i1(f.vars(), result.vars());
+ IndexFor i2(g.vars(), result.vars());
- for( size_t i = 0; i < quot._p.size(); i++, ++i1, ++i2 )
- quot._p[i] += _p[i1] / f._p[i2];
+ for( size_t i = 0; i < result.states(); i++, ++i1, ++i2 )
+ result[i] = op( f[i1], g[i2] );
- return quot;
+ return result;
}
}
VarSet::const_iterator it = f.vars().begin();
Var i = *it; it++; Var j = *it;
TFactor<T> projection = f.marginal(i) * f.marginal(j);
- return real( dist( f.normalized(), projection, Prob::DISTKL ) );
+ return dist( f.normalized(), projection, Prob::DISTKL );
}
/// Clone *this (virtual copy constructor)
virtual FactorGraph* clone() const { return new FactorGraph(); }
- /// Create (virtual default constructor)
- virtual FactorGraph* create() const { return new FactorGraph(*this); }
-
/// Returns const reference to i'th variable
const Var & var(size_t i) const { return _vars[i]; }
/// Returns const reference to all factors
void ReadFromFile(const char *filename);
/// Writes a FactorGraph to a file
- void WriteToFile(const char *filename) const;
+ void WriteToFile(const char *filename, size_t precision=15) const;
/// Writes a FactorGraph to a GraphViz .dot file
void printDot( std::ostream& os ) const;
/// @name General InfAlg interface
//@{
virtual Gibbs* clone() const { return new Gibbs(*this); }
- virtual Gibbs* create() const { return new Gibbs(); }
virtual std::string identify() const { return std::string(Name) + printProperties(); }
virtual Factor belief( const Var &n ) const;
virtual Factor belief( const VarSet &ns ) const;
/// @name General InfAlg interface
//@{
virtual HAK* clone() const { return new HAK(*this); }
- virtual HAK* create() const { return new HAK(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const;
virtual Factor belief( const VarSet &ns ) const;
/// @name General InfAlg interface
//@{
virtual JTree* clone() const { return new JTree(*this); }
- virtual JTree* create() const { return new JTree(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const;
virtual Factor belief( const VarSet &ns ) const;
/// @name General InfAlg interface
//@{
virtual LC* clone() const { return new LC(*this); }
- virtual LC* create() const { return new LC(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const { return( _beliefs[findVar(n)] ); }
virtual Factor belief( const VarSet &/*ns*/ ) const { DAI_THROW(NOT_IMPLEMENTED); return Factor(); }
/// @name General InfAlg interface
//@{
virtual MF* clone() const { return new MF(*this); }
- virtual MF* create() const { return new MF(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const;
virtual Factor belief( const VarSet &ns ) const;
/// @name General InfAlg interface
//@{
virtual MR* clone() const { return new MR(*this); }
- virtual MR* create() const { return new MR(); }
virtual std::string identify() const;
virtual Factor belief( const Var &n ) const;
virtual Factor belief( const VarSet &/*ns*/ ) const { DAI_THROW(NOT_IMPLEMENTED); return Factor(); }
arg = i;
}
}
- return make_pair(arg,max);
+ return std::make_pair(arg,max);
}
/// Normalizes vector using the specified norm
/// Clone *this (virtual copy constructor)
virtual RegionGraph* clone() const { return new RegionGraph(*this); }
- /// Create (virtual default constructor)
- virtual RegionGraph* create() const { return new RegionGraph(); }
-
/// Set the content of the I'th factor and make a backup of its old content if backup == true
virtual void setFactor( size_t I, const Factor &newFactor, bool backup = false ) {
FactorGraph::setFactor( I, newFactor, backup );
/// @name General InfAlg interface
//@{
virtual TreeEP* clone() const { return new TreeEP(*this); }
- virtual TreeEP* create() const { return new TreeEP(); }
virtual std::string identify() const;
virtual Real logZ() const;
virtual void init();
};
+/// Split a string into tokens
+void tokenizeString( const std::string& s,
+ std::vector<std::string>& outTokens,
+ const std::string& delim="\t\n" );
+
+
} // end of namespace dai
#include <set>
#include <cassert>
#include <limits>
+#include <climits> // Work-around for bug in boost graph library
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/prim_minimum_spanning_tree.hpp>
/** Uses implementation in Boost Graph Library.
*/
template<typename T> DEdgeVec MaxSpanningTreePrims( const WeightedGraph<T> & Graph ) {
- T maxweight = Graph.begin()->second;
- for( typename WeightedGraph<T>::const_iterator it = Graph.begin(); it != Graph.end(); it++ )
- if( it->second > maxweight )
- maxweight = it->second;
- // make a copy of the graph
- WeightedGraph<T> gr( Graph );
- // invoke MinSpanningTreePrims with negative weights
- // (which have to be shifted to satisfy positivity criterion)
- for( typename WeightedGraph<T>::iterator it = gr.begin(); it != gr.end(); it++ )
- it->second = maxweight - it->second;
- return MinSpanningTreePrims( gr );
+ if( Graph.size() == 0 )
+ return DEdgeVec();
+ else {
+ T maxweight = Graph.begin()->second;
+ for( typename WeightedGraph<T>::const_iterator it = Graph.begin(); it != Graph.end(); it++ )
+ if( it->second > maxweight )
+ maxweight = it->second;
+ // make a copy of the graph
+ WeightedGraph<T> gr( Graph );
+ // invoke MinSpanningTreePrims with negative weights
+ // (which have to be shifted to satisfy positivity criterion)
+ for( typename WeightedGraph<T>::iterator it = gr.begin(); it != gr.end(); it++ )
+ it->second = maxweight - it->second;
+ return MinSpanningTreePrims( gr );
+ }
}
void BP::findMaxResidual( size_t &i, size_t &_I ) {
-/*
- i = 0;
- _I = 0;
- double maxres = residual( i, _I );
- for( size_t j = 0; j < nrVars(); ++j )
- foreach( const Neighbor &I, nbV(j) )
- if( residual( j, I.iter ) > maxres ) {
- i = j;
- _I = I.iter;
- maxres = residual( i, _I );
- }
-*/
assert( !_lut.empty() );
LutType::const_iterator largestEl = _lut.end();
--largestEl;
// calculate updated message I->i
size_t I = nbV(i,_I);
- if( !DAI_BP_FAST ) {
- /* UNOPTIMIZED (SIMPLE TO READ, BUT SLOW) VERSION */
- Factor prod( factor( I ) );
- foreach( const Neighbor &j, nbF(I) )
- if( j != i ) { // for all j in I \ i
- foreach( const Neighbor &J, nbV(j) )
- if( J != I ) { // for all J in nb(j) \ I
- prod *= Factor( var(j), message(j, J.iter) );
- }
- }
- newMessage(i,_I) = prod.marginal( var(i) ).p();
- } else {
- /* OPTIMIZED VERSION */
- Prob prod( factor(I).p() );
- if( props.logdomain )
- prod.takeLog();
+ Factor Fprod( factor(I) );
+ Prob &prod = Fprod.p();
+ if( props.logdomain )
+ prod.takeLog();
+
+ // Calculate product of incoming messages and factor I
+ foreach( const Neighbor &j, nbF(I) )
+ if( j != i ) { // for all j in I \ i
+ // prod_j will be the product of messages coming into j
+ Prob prod_j( var(j).states(), props.logdomain ? 0.0 : 1.0 );
+ foreach( const Neighbor &J, nbV(j) )
+ if( J != I ) { // for all J in nb(j) \ I
+ if( props.logdomain )
+ prod_j += message( j, J.iter );
+ else
+ prod_j *= message( j, J.iter );
+ }
- // Calculate product of incoming messages and factor I
- foreach( const Neighbor &j, nbF(I) ) {
- if( j != i ) { // for all j in I \ i
+ // multiply prod with prod_j
+ if( !DAI_BP_FAST ) {
+ /* UNOPTIMIZED (SIMPLE TO READ, BUT SLOW) VERSION */
+ if( props.logdomain )
+ Fprod += Factor( var(j), prod_j );
+ else
+ Fprod *= Factor( var(j), prod_j );
+ } else {
+ /* OPTIMIZED VERSION */
size_t _I = j.dual;
// ind is the precalculated IndexFor(j,I) i.e. to x_I == k corresponds x_j == ind[k]
const ind_t &ind = index(j, _I);
-
- // prod_j will be the product of messages coming into j
- Prob prod_j( var(j).states(), props.logdomain ? 0.0 : 1.0 );
- foreach( const Neighbor &J, nbV(j) )
- if( J != I ) { // for all J in nb(j) \ I
- if( props.logdomain )
- prod_j += message( j, J.iter );
- else
- prod_j *= message( j, J.iter );
- }
-
- // multiply prod with prod_j
for( size_t r = 0; r < prod.size(); ++r )
if( props.logdomain )
prod[r] += prod_j[ind[r]];
prod[r] *= prod_j[ind[r]];
}
}
- if( props.logdomain ) {
- prod -= prod.max();
- prod.takeExp();
- }
- // Marginalize onto i
- Prob marg( var(i).states(), 0.0 );
+ if( props.logdomain ) {
+ prod -= prod.max();
+ prod.takeExp();
+ }
+
+ // Marginalize onto i
+ Prob marg;
+ if( !DAI_BP_FAST ) {
+ /* UNOPTIMIZED (SIMPLE TO READ, BUT SLOW) VERSION */
+ if( props.inference == Properties::InfType::SUMPROD )
+ marg = Fprod.marginal( var(i) ).p();
+ else
+ marg = Fprod.maxMarginal( var(i) ).p();
+ } else {
+ /* OPTIMIZED VERSION */
+ marg = Prob( var(i).states(), 0.0 );
// ind is the precalculated IndexFor(i,I) i.e. to x_I == k corresponds x_i == ind[k]
const ind_t ind = index(i,_I);
if( props.inference == Properties::InfType::SUMPROD )
if( prod[r] > marg[ind[r]] )
marg[ind[r]] = prod[r];
marg.normalize();
-
- // Store result
- if( props.logdomain )
- newMessage(i,_I) = marg.log();
- else
- newMessage(i,_I) = marg;
}
+ // Store result
+ if( props.logdomain )
+ newMessage(i,_I) = marg.log();
+ else
+ newMessage(i,_I) = marg;
+
// Update the residual if necessary
if( props.updates == Properties::UpdateType::SEQMAX )
updateResidual( i, _I , dist( newMessage( i, _I ), message( i, _I ), Prob::DISTLINF ) );
void BP::calcBeliefF( size_t I, Prob &p ) const {
- p = factor(I).p();
- if( props.logdomain )
- p.takeLog();
+ Factor Fprod( factor( I ) );
+ Prob &prod = Fprod.p();
- foreach( const Neighbor &j, nbF(I) ) {
- size_t _I = j.dual;
- // ind is the precalculated IndexFor(j,I) i.e. to x_I == k corresponds x_j == ind[k]
- const ind_t & ind = index(j, _I);
+ if( props.logdomain )
+ prod.takeLog();
+ foreach( const Neighbor &j, nbF(I) ) {
// prod_j will be the product of messages coming into j
- Prob prod_j( var(j).states(), props.logdomain ? 0.0 : 1.0 );
- foreach( const Neighbor &J, nbV(j) ) {
+ Prob prod_j( var(j).states(), props.logdomain ? 0.0 : 1.0 );
+ foreach( const Neighbor &J, nbV(j) )
if( J != I ) { // for all J in nb(j) \ I
if( props.logdomain )
prod_j += newMessage( j, J.iter );
else
prod_j *= newMessage( j, J.iter );
}
- }
- // multiply p with prod_j
- for( size_t r = 0; r < p.size(); ++r ) {
+ // multiply prod with prod_j
+ if( !DAI_BP_FAST ) {
+ /* UNOPTIMIZED (SIMPLE TO READ, BUT SLOW) VERSION */
if( props.logdomain )
- p[r] += prod_j[ind[r]];
+ Fprod += Factor( var(j), prod_j );
else
- p[r] *= prod_j[ind[r]];
+ Fprod *= Factor( var(j), prod_j );
+ } else {
+ /* OPTIMIZED VERSION */
+ size_t _I = j.dual;
+ // ind is the precalculated IndexFor(j,I) i.e. to x_I == k corresponds x_j == ind[k]
+ const ind_t & ind = index(j, _I);
+
+ for( size_t r = 0; r < prod.size(); ++r ) {
+ if( props.logdomain )
+ prod[r] += prod_j[ind[r]];
+ else
+ prod[r] *= prod_j[ind[r]];
+ }
}
}
+
+ p = prod;
}
p -= p.max();
p.takeExp();
}
-
p.normalize();
+
return( Factor( var(i), p ) );
}
+Factor BP::beliefF( size_t I ) const {
+ Prob p;
+ calcBeliefF( I, p );
+
+ if( props.logdomain ) {
+ p -= p.max();
+ p.takeExp();
+ }
+ p.normalize();
+
+ return( Factor( factor(I).vars(), p ) );
+}
+
+
Factor BP::belief( const Var &n ) const {
return( beliefV( findVar( n ) ) );
}
}
-Factor BP::beliefF( size_t I ) const {
- if( !DAI_BP_FAST ) {
- /* UNOPTIMIZED (SIMPLE TO READ, BUT SLOW) VERSION */
-
- Factor prod( factor(I) );
- foreach( const Neighbor &j, nbF(I) ) {
- foreach( const Neighbor &J, nbV(j) ) {
- if( J != I ) // for all J in nb(j) \ I
- prod *= Factor( var(j), newMessage(j, J.iter) );
- }
- }
- return prod.normalized();
- } else {
- /* OPTIMIZED VERSION */
-
- Prob prod;
- calcBeliefF( I, prod );
-
- if( props.logdomain ) {
- prod -= prod.max();
- prod.takeExp();
- }
- prod.normalize();
-
- Factor result( factor(I).vars(), prod );
-
- return( result );
- }
-}
-
-
Real BP::logZ() const {
Real sum = 0.0;
for(size_t i = 0; i < nrVars(); ++i )
--- /dev/null
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#include <dai/util.h>
+#include <dai/emalg.h>
+
+
+namespace dai {
+
+
+std::map<std::string, ParameterEstimation::ParamEstFactory> *ParameterEstimation::_registry = NULL;
+
+
+void ParameterEstimation::loadDefaultRegistry() {
+ _registry = new std::map<std::string, ParamEstFactory>();
+ (*_registry)["ConditionalProbEstimation"] = CondProbEstimation::factory;
+}
+
+
+ParameterEstimation* ParameterEstimation::construct( const std::string &method, const PropertySet &p ) {
+ if( _registry == NULL )
+ loadDefaultRegistry();
+ std::map<std::string, ParamEstFactory>::iterator i = _registry->find(method);
+ if( i == _registry->end() )
+ DAI_THROW(UNKNOWN_PARAMETER_ESTIMATION_METHOD);
+ ParamEstFactory factory = i->second;
+ return factory(p);
+}
+
+
+ParameterEstimation* CondProbEstimation::factory( const PropertySet &p ) {
+ size_t target_dimension = p.getStringAs<size_t>("target_dim");
+ size_t total_dimension = p.getStringAs<size_t>("total_dim");
+ Real pseudo_count = 1;
+ if( p.hasKey("pseudo_count") )
+ pseudo_count = p.getStringAs<Real>("pseudo_count");
+ return new CondProbEstimation( target_dimension, Prob( total_dimension, pseudo_count ) );
+}
+
+
+CondProbEstimation::CondProbEstimation( size_t target_dimension, const Prob &pseudocounts )
+ : _target_dim(target_dimension), _stats(pseudocounts), _initial_stats(pseudocounts)
+{
+ if( _stats.size() % _target_dim )
+ DAI_THROW(MALFORMED_PROPERTY);
+}
+
+
+void CondProbEstimation::addSufficientStatistics( const Prob &p ) {
+ _stats += p;
+}
+
+
+Prob CondProbEstimation::estimate() {
+ // normalize pseudocounts
+ for( size_t parent = 0; parent < _stats.size(); parent += _target_dim ) {
+ // calculate norm
+ Real norm = 0.0;
+ size_t top = parent + _target_dim;
+ for( size_t i = parent; i < top; ++i )
+ norm += _stats[i];
+ if( norm != 0.0 )
+ norm = 1.0 / norm;
+ // normalize
+ for( size_t i = parent; i < top; ++i )
+ _stats[i] *= norm;
+ }
+ // reset _stats to _initial_stats
+ Prob result = _stats;
+ _stats = _initial_stats;
+ return result;
+}
+
+
+Permute SharedParameters::calculatePermutation( const std::vector<Var> &varorder, VarSet &outVS ) {
+ // Collect all labels and dimensions, and order them in vs
+ std::vector<size_t> dims;
+ dims.reserve( varorder.size() );
+ std::vector<long> labels;
+ labels.reserve( varorder.size() );
+ for( size_t i = 0; i < varorder.size(); i++ ) {
+ dims.push_back( varorder[i].states() );
+ labels.push_back( varorder[i].label() );
+ outVS |= varorder[i];
+ }
+
+ // Construct the sigma array for the permutation object
+ std::vector<size_t> sigma;
+ sigma.reserve( dims.size() );
+ for( VarSet::iterator set_iterator = outVS.begin(); sigma.size() < dims.size(); ++set_iterator )
+ sigma.push_back( find(labels.begin(), labels.end(), set_iterator->label()) - labels.begin() );
+
+ return Permute( dims, sigma );
+}
+
+
+void SharedParameters::setPermsAndVarSetsFromVarOrders() {
+ if( _varorders.size() == 0 )
+ return;
+ if( _estimation == NULL )
+ DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
+
+ // Construct the permutation objects and the varsets
+ for( FactorOrientations::const_iterator foi = _varorders.begin(); foi != _varorders.end(); ++foi ) {
+ VarSet vs;
+ _perms[foi->first] = calculatePermutation( foi->second, vs );
+ _varsets[foi->first] = vs;
+ if( _estimation->probSize() != vs.nrStates() )
+ DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
+ }
+}
+
+
+SharedParameters::SharedParameters( std::istream &is, const FactorGraph &fg_varlookup )
+ : _varsets(), _perms(), _varorders(), _estimation(NULL), _deleteEstimation(true)
+{
+ // Read the desired parameter estimation method from the stream
+ std::string est_method;
+ PropertySet props;
+ is >> est_method;
+ is >> props;
+
+ // Construct a corresponding object
+ _estimation = ParameterEstimation::construct( est_method, props );
+
+ // Read in the factors that are to be estimated
+ size_t num_factors;
+ is >> num_factors;
+ for( size_t sp_i = 0; sp_i < num_factors; ++sp_i ) {
+ std::string line;
+ while( line.size() == 0 && getline(is, line) )
+ ;
+
+ std::vector<std::string> fields;
+ tokenizeString(line, fields, " \t");
+
+ // Lookup the factor in the factorgraph
+ if( fields.size() < 1 )
+ DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
+ std::istringstream iss;
+ iss.str( fields[0] );
+ size_t factor;
+ iss >> factor;
+ const VarSet &vs = fg_varlookup.factor(factor).vars();
+ if( fields.size() != vs.size() + 1 )
+ DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
+
+ // Construct the vector of Vars
+ std::vector<Var> var_order;
+ var_order.reserve( vs.size() );
+ for( size_t fi = 1; fi < fields.size(); ++fi ) {
+ // Lookup a single variable by label
+ long label;
+ std::istringstream labelparse( fields[fi] );
+ labelparse >> label;
+ VarSet::const_iterator vsi = vs.begin();
+ for( ; vsi != vs.end(); ++vsi )
+ if( vsi->label() == label )
+ break;
+ if( vsi == vs.end() )
+ DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
+ var_order.push_back( *vsi );
+ }
+ _varorders[factor] = var_order;
+ }
+
+ // Calculate the necessary permutations
+ setPermsAndVarSetsFromVarOrders();
+}
+
+
+SharedParameters::SharedParameters( const SharedParameters &sp )
+ : _varsets(sp._varsets), _perms(sp._perms), _varorders(sp._varorders), _estimation(sp._estimation), _deleteEstimation(sp._deleteEstimation)
+{
+ // If sp owns its _estimation object, we should clone it instead
+ if( _deleteEstimation )
+ _estimation = _estimation->clone();
+}
+
+
+SharedParameters::SharedParameters( const FactorOrientations &varorders, ParameterEstimation *estimation )
+ : _varsets(), _perms(), _varorders(varorders), _estimation(estimation), _deleteEstimation(false)
+{
+ // Calculate the necessary permutations
+ setPermsAndVarSetsFromVarOrders();
+}
+
+
+void SharedParameters::collectSufficientStatistics( InfAlg &alg ) {
+ for( std::map< FactorIndex, Permute >::iterator i = _perms.begin(); i != _perms.end(); ++i ) {
+ Permute &perm = i->second;
+ VarSet &vs = _varsets[i->first];
+
+ Factor b = alg.belief(vs);
+ Prob p( b.states(), 0.0 );
+ for( size_t entry = 0; entry < b.states(); ++entry )
+ p[entry] = b[perm.convert_linear_index(entry)];
+ _estimation->addSufficientStatistics( p );
+ }
+}
+
+
+void SharedParameters::setParameters( FactorGraph &fg ) {
+ Prob p = _estimation->estimate();
+ for( std::map<FactorIndex, Permute>::iterator i = _perms.begin(); i != _perms.end(); ++i ) {
+ Permute &perm = i->second;
+ VarSet &vs = _varsets[i->first];
+
+ Factor f( vs, 0.0 );
+ for( size_t entry = 0; entry < f.states(); ++entry )
+ f[perm.convert_linear_index(entry)] = p[entry];
+
+ fg.setFactor( i->first, f );
+ }
+}
+
+
+MaximizationStep::MaximizationStep( std::istream &is, const FactorGraph &fg_varlookup ) : _params() {
+ size_t num_params = -1;
+ is >> num_params;
+ _params.reserve( num_params );
+ for( size_t i = 0; i < num_params; ++i )
+ _params.push_back( SharedParameters( is, fg_varlookup ) );
+}
+
+
+void MaximizationStep::addExpectations( InfAlg &alg ) {
+ for( size_t i = 0; i < _params.size(); ++i )
+ _params[i].collectSufficientStatistics( alg );
+}
+
+
+void MaximizationStep::maximize( FactorGraph &fg ) {
+ for( size_t i = 0; i < _params.size(); ++i )
+ _params[i].setParameters( fg );
+}
+
+
+const std::string EMAlg::MAX_ITERS_KEY("max_iters");
+const std::string EMAlg::LOG_Z_TOL_KEY("log_z_tol");
+const size_t EMAlg::MAX_ITERS_DEFAULT = 30;
+const Real EMAlg::LOG_Z_TOL_DEFAULT = 0.01;
+
+
+EMAlg::EMAlg( const Evidence &evidence, InfAlg &estep, std::istream &msteps_file )
+ : _evidence(evidence), _estep(estep), _msteps(), _iters(0), _lastLogZ(), _max_iters(MAX_ITERS_DEFAULT), _log_z_tol(LOG_Z_TOL_DEFAULT)
+{
+ msteps_file.exceptions( std::istream::eofbit | std::istream::failbit | std::istream::badbit );
+ size_t num_msteps = -1;
+ msteps_file >> num_msteps;
+ _msteps.reserve(num_msteps);
+ for( size_t i = 0; i < num_msteps; ++i )
+ _msteps.push_back( MaximizationStep( msteps_file, estep.fg() ) );
+}
+
+
+void EMAlg::setTermConditions( const PropertySet &p ) {
+ if( p.hasKey(MAX_ITERS_KEY) )
+ _max_iters = p.getStringAs<size_t>(MAX_ITERS_KEY);
+ if( p.hasKey(LOG_Z_TOL_KEY) )
+ _log_z_tol = p.getStringAs<Real>(LOG_Z_TOL_KEY);
+}
+
+
+bool EMAlg::hasSatisfiedTermConditions() const {
+ if( _iters >= _max_iters )
+ return true;
+ else if( _lastLogZ.size() < 3 )
+ // need at least 2 to calculate ratio
+ // Also, throw away first iteration, as the parameters may not
+ // have been normalized according to the estimation method
+ return false;
+ else {
+ Real current = _lastLogZ[_lastLogZ.size() - 1];
+ Real previous = _lastLogZ[_lastLogZ.size() - 2];
+ if( previous == 0 )
+ return false;
+ Real diff = current - previous;
+ if( diff < 0 ) {
+ std::cerr << "Error: in EM log-likehood decreased from " << previous << " to " << current << std::endl;
+ return true;
+ }
+ return diff / abs(previous) <= _log_z_tol;
+ }
+}
+
+
+Real EMAlg::iterate( MaximizationStep &mstep ) {
+ Real logZ = 0;
+
+ // Expectation calculation
+ for( Evidence::const_iterator e = _evidence.begin(); e != _evidence.end(); ++e ) {
+ InfAlg* clamped = _estep.clone();
+ e->applyEvidence( *clamped );
+ clamped->init();
+ clamped->run();
+
+ logZ += clamped->logZ();
+
+ mstep.addExpectations( *clamped );
+
+ delete clamped;
+ }
+
+ // Maximization of parameters
+ mstep.maximize( _estep.fg() );
+
+ return logZ;
+}
+
+
+Real EMAlg::iterate() {
+ Real likelihood;
+ for( size_t i = 0; i < _msteps.size(); ++i )
+ likelihood = iterate( _msteps[i] );
+ _lastLogZ.push_back( likelihood );
+ ++_iters;
+ return likelihood;
+}
+
+
+void EMAlg::run() {
+ while( !hasSatisfiedTermConditions() )
+ iterate();
+}
+
+
+} // end of namespace dai
--- /dev/null
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#include <sstream>
+#include <string>
+#include <cstdlib>
+
+#include <dai/util.h>
+#include <dai/evidence.h>
+
+
+namespace dai {
+
+
+void Observation::addObservation( Var node, size_t setting ) {
+ _obs[node] = setting;
+}
+
+
+void Observation::applyEvidence( InfAlg &alg ) const {
+ for( std::map<Var, size_t>::const_iterator i = _obs.begin(); i != _obs.end(); ++i )
+ alg.clamp( i->first, i->second );
+}
+
+
+void Evidence::addEvidenceTabFile( std::istream &is, FactorGraph &fg ) {
+ std::map<std::string, Var> varMap;
+ for( std::vector<Var>::const_iterator v = fg.vars().begin(); v != fg.vars().end(); ++v ) {
+ std::stringstream s;
+ s << v->label();
+ varMap[s.str()] = *v;
+ }
+
+ addEvidenceTabFile( is, varMap );
+}
+
+
+void Evidence::addEvidenceTabFile( std::istream &is, std::map<std::string, Var> &varMap ) {
+ std::string line;
+ getline( is, line );
+
+ // Parse header
+ std::vector<std::string> header_fields;
+ tokenizeString( line, header_fields );
+ std::vector<std::string>::const_iterator p_field = header_fields.begin();
+ if( p_field == header_fields.end() )
+ DAI_THROW(INVALID_EVIDENCE_LINE);
+
+ std::vector<Var> vars;
+ for( ; p_field != header_fields.end(); ++p_field ) {
+ std::map<std::string, Var>::iterator elem = varMap.find( *p_field );
+ if( elem == varMap.end() )
+ DAI_THROW(INVALID_EVIDENCE_FILE);
+ vars.push_back( elem->second );
+ }
+
+ // Read samples
+ while( getline(is, line) ) {
+ std::vector<std::string> fields;
+ tokenizeString( line, fields );
+ if( fields.size() != vars.size() )
+ DAI_THROW(INVALID_EVIDENCE_LINE);
+
+ Observation sampleData;
+ for( size_t i = 0; i < vars.size(); ++i ) {
+ if( fields[i].size() > 0 ) { // skip if missing observation
+ if( fields[i].find_first_not_of("0123456789") != std::string::npos )
+ DAI_THROW(INVALID_EVIDENCE_OBSERVATION);
+ size_t state = atoi( fields[i].c_str() );
+ if( state >= vars[i].states() )
+ DAI_THROW(INVALID_EVIDENCE_OBSERVATION);
+ sampleData.addObservation( vars[i], state );
+ }
+ }
+ _samples.push_back( sampleData );
+ } // finished sample line
+}
+
+
+} // end of namespace dai
"FactorGraph is not connected",
"Impossible typecast",
"Internal error",
- "Quantity not normalizable"
+ "Quantity not normalizable",
+ "Cannot parse Evidence file",
+ "Cannot parse Evidence line",
+ "Invalid observation in Evidence file",
+ "Invalid variable order in SharedParameters",
+ "Variable order line in EM file is invalid",
+ "Unrecognized parameter estimation method"
};
#include <iostream>
+#include <iomanip>
#include <iterator>
#include <map>
#include <set>
nr_nonzeros++;
os << nr_nonzeros << endl;
for( size_t k = 0; k < fg.factor(I).states(); k++ )
- if( fg.factor(I)[k] != 0.0 ) {
- char buf[20];
- sprintf(buf,"%18.14g", fg.factor(I)[k]);
- os << k << " " << buf << endl;
- }
+ if( fg.factor(I)[k] != 0.0 )
+ os << k << " " << setw(os.precision()+4) << fg.factor(I)[k] << endl;
}
return(os);
}
-void FactorGraph::WriteToFile( const char *filename ) const {
+void FactorGraph::WriteToFile( const char *filename, size_t precision ) const {
ofstream outfile;
outfile.open( filename );
if( outfile.is_open() ) {
+ outfile.precision( precision );
outfile << *this;
outfile.close();
} else
* *
* This is a MEX-file for MATLAB. *
* *
- * [logZ,q,md] = dai(psi,method,opts); *
+ * [logZ,q,md,qv,qf] = dai(psi,method,opts); *
* *
*=================================================================*/
#define METHOD_IN prhs[1]
#define OPTS_IN prhs[2]
#define NR_IN 3
+#define NR_IN_OPT 0
/* Output Arguments */
#define LOGZ_OUT plhs[0]
#define Q_OUT plhs[1]
#define MD_OUT plhs[2]
+#define QV_OUT plhs[3]
+#define QF_OUT plhs[4]
#define NR_OUT 3
+#define NR_OUT_OPT 2
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[] )
{
size_t buflen;
- /* Check for proper number of arguments */
- if( (nrhs != NR_IN) || (nlhs != NR_OUT) ) {
- mexErrMsgTxt("Usage: [logZ,q,md] = dai(psi,method,opts)\n\n"
+ // Check for proper number of arguments
+ if( ((nrhs < NR_IN) || (nrhs > NR_IN + NR_IN_OPT)) || ((nlhs < NR_OUT) || (nlhs > NR_OUT + NR_OUT_OPT)) ) {
+ mexErrMsgTxt("Usage: [logZ,q,md,qv,qf] = dai(psi,method,opts)\n\n"
"\n"
"INPUT: psi = linear cell array containing the factors \n"
" psi{i} should be a structure with a Member field\n"
"\n"
"OUTPUT: logZ = approximation of the logarithm of the partition sum.\n"
" q = linear cell array containing all final beliefs.\n"
- " md = maxdiff (final linf-dist between new and old single node beliefs).\n");
+ " md = maxdiff (final linf-dist between new and old single node beliefs).\n"
+ " qv = linear cell array containing all variable beliefs.\n"
+ " qf = linear cell array containing all factor beliefs.\n");
}
char *method;
MD_OUT = mxCreateDoubleMatrix(1,1,mxREAL);
*(mxGetPr(MD_OUT)) = maxdiff;
+ if( nlhs >= 4 ) {
+ vector<Factor> qv;
+ qv.reserve( fg.nrVars() );
+ for( size_t i = 0; i < fg.nrVars(); i++ )
+ qv.push_back( obj->belief( fg.var(i) ) );
+ QV_OUT = Factors2mx( qv );
+ }
+
+ if( nlhs >= 5 ) {
+ vector<Factor> qf;
+ qf.reserve( fg.nrFactors() );
+ for( size_t I = 0; I < fg.nrFactors(); I++ )
+ qf.push_back( obj->belief( fg.factor(I).vars() ) );
+ QF_OUT = Factors2mx( qf );
+ }
return;
}
return (int)floor(_uni_rnd() * (max + 1 - min) + min);
}
+void tokenizeString(const std::string& s,
+ std::vector<std::string>& outTokens,
+ const std::string& delim)
+{
+ size_t start = 0;
+ while (start < s.size()) {
+ size_t end = s.find_first_of(delim, start);
+ if (end > s.size())
+ end = s.size();
+ outTokens.push_back(s.substr(start, end - start));
+ start = end + 1;
+ }
+}
} // end of namespace dai
cout << "error and relative logZ error (comparing with the results of" << endl;
cout << "<method0>, the base method, for which one can use JTREE_HUGIN)." << endl << endl;
cout << opts_required << opts_optional << endl;
+#ifdef DAI_DEBUG
+ cout << "This is a debugging (unoptimised) build of libDAI." << endl;
+#endif
return 1;
}
- // Read aliases
- map<string,string> Aliases;
- if( !aliases.empty() ) {
- ifstream infile;
- infile.open (aliases.c_str());
- if (infile.is_open()) {
- while( true ) {
- string line;
- getline(infile,line);
- if( infile.fail() )
- break;
- if( (!line.empty()) && (line[0] != '#') ) {
- string::size_type pos = line.find(':',0);
- if( pos == string::npos )
- throw "Invalid alias";
- else {
- string::size_type posl = line.substr(0, pos).find_last_not_of(" \t");
- string key = line.substr(0, posl + 1);
- string::size_type posr = line.substr(pos + 1, line.length()).find_first_not_of(" \t");
- string val = line.substr(pos + 1 + posr, line.length());
- Aliases[key] = val;
+ try {
+ // Read aliases
+ map<string,string> Aliases;
+ if( !aliases.empty() ) {
+ ifstream infile;
+ infile.open (aliases.c_str());
+ if (infile.is_open()) {
+ while( true ) {
+ string line;
+ getline(infile,line);
+ if( infile.fail() )
+ break;
+ if( (!line.empty()) && (line[0] != '#') ) {
+ string::size_type pos = line.find(':',0);
+ if( pos == string::npos )
+ throw string("Invalid alias");
+ else {
+ string::size_type posl = line.substr(0, pos).find_last_not_of(" \t");
+ string key = line.substr(0, posl + 1);
+ string::size_type posr = line.substr(pos + 1, line.length()).find_first_not_of(" \t");
+ string val = line.substr(pos + 1 + posr, line.length());
+ Aliases[key] = val;
+ }
}
}
- }
- infile.close();
- } else
- throw "Error opening aliases file";
- }
-
- FactorGraph fg;
- fg.ReadFromFile( filename.c_str() );
-
- vector<Factor> q0;
- double logZ0 = 0.0;
-
- cout.setf( ios_base::scientific );
- cout.precision( 3 );
-
- cout << "# " << filename << endl;
- cout.width( 39 );
- cout << left << "# METHOD" << "\t";
- if( report_time )
- cout << right << "SECONDS " << "\t";
- if( report_iters )
- cout << "ITERS" << "\t";
- cout << "MAX ERROR" << "\t";
- cout << "AVG ERROR" << "\t";
- cout << "LOGZ ERROR" << "\t";
- cout << "MAXDIFF" << "\t";
- cout << endl;
-
- for( size_t m = 0; m < methods.size(); m++ ) {
- pair<string, PropertySet> meth = parseMethod( methods[m], Aliases );
-
- if( vm.count("tol") )
- meth.second.Set("tol",tol);
- if( vm.count("maxiter") )
- meth.second.Set("maxiter",maxiter);
- if( vm.count("verbose") )
- meth.second.Set("verbose",verbose);
- TestDAI piet(fg, meth.first, meth.second );
- piet.doDAI();
- if( m == 0 ) {
- q0 = piet.q;
- logZ0 = piet.logZ;
+ infile.close();
+ } else
+ throw string("Error opening aliases file");
}
- piet.calcErrs(q0);
+ FactorGraph fg;
+ fg.ReadFromFile( filename.c_str() );
+
+ vector<Factor> q0;
+ double logZ0 = 0.0;
+
+ cout.setf( ios_base::scientific );
+ cout.precision( 3 );
+
+ cout << "# " << filename << endl;
cout.width( 39 );
- cout << left << methods[m] << "\t";
+ cout << left << "# METHOD" << "\t";
if( report_time )
- cout << right << piet.time << "\t";
- if( report_iters ) {
- if( piet.has_iters ) {
- cout << piet.iters << "\t";
- } else {
- cout << "N/A \t";
+ cout << right << "SECONDS " << "\t";
+ if( report_iters )
+ cout << "ITERS" << "\t";
+ cout << "MAX ERROR" << "\t";
+ cout << "AVG ERROR" << "\t";
+ cout << "LOGZ ERROR" << "\t";
+ cout << "MAXDIFF" << "\t";
+ cout << endl;
+
+ for( size_t m = 0; m < methods.size(); m++ ) {
+ pair<string, PropertySet> meth = parseMethod( methods[m], Aliases );
+
+ if( vm.count("tol") )
+ meth.second.Set("tol",tol);
+ if( vm.count("maxiter") )
+ meth.second.Set("maxiter",maxiter);
+ if( vm.count("verbose") )
+ meth.second.Set("verbose",verbose);
+ TestDAI piet(fg, meth.first, meth.second );
+ piet.doDAI();
+ if( m == 0 ) {
+ q0 = piet.q;
+ logZ0 = piet.logZ;
+ }
+ piet.calcErrs(q0);
+
+ cout.width( 39 );
+ cout << left << methods[m] << "\t";
+ if( report_time )
+ cout << right << piet.time << "\t";
+ if( report_iters ) {
+ if( piet.has_iters ) {
+ cout << piet.iters << "\t";
+ } else {
+ cout << "N/A \t";
+ }
}
- }
- if( m > 0 ) {
- cout.setf( ios_base::scientific );
- cout.precision( 3 );
-
- double me = clipdouble( piet.maxErr(), 1e-9 );
- cout << me << "\t";
-
- double ae = clipdouble( piet.avgErr(), 1e-9 );
- cout << ae << "\t";
-
- if( piet.has_logZ ) {
- cout.setf( ios::showpos );
- double le = clipdouble( piet.logZ / logZ0 - 1.0, 1e-9 );
- cout << le << "\t";
- cout.unsetf( ios::showpos );
- } else
- cout << "N/A \t";
-
- if( piet.has_maxdiff ) {
- double md = clipdouble( piet.maxdiff, 1e-9 );
- if( isnan( me ) )
- md = me;
- if( isnan( ae ) )
- md = ae;
- cout << md << "\t";
- } else
- cout << "N/A \t";
- }
- cout << endl;
+ if( m > 0 ) {
+ cout.setf( ios_base::scientific );
+ cout.precision( 3 );
+
+ double me = clipdouble( piet.maxErr(), 1e-9 );
+ cout << me << "\t";
+
+ double ae = clipdouble( piet.avgErr(), 1e-9 );
+ cout << ae << "\t";
+
+ if( piet.has_logZ ) {
+ cout.setf( ios::showpos );
+ double le = clipdouble( piet.logZ / logZ0 - 1.0, 1e-9 );
+ cout << le << "\t";
+ cout.unsetf( ios::showpos );
+ } else
+ cout << "N/A \t";
+
+ if( piet.has_maxdiff ) {
+ double md = clipdouble( piet.maxdiff, 1e-9 );
+ if( isnan( me ) )
+ md = me;
+ if( isnan( ae ) )
+ md = ae;
+ cout << md << "\t";
+ } else
+ cout << "N/A \t";
+ }
+ cout << endl;
- if( marginals ) {
- for( size_t i = 0; i < piet.q.size(); i++ )
- cout << "# " << piet.q[i] << endl;
+ if( marginals ) {
+ for( size_t i = 0; i < piet.q.size(); i++ )
+ cout << "# " << piet.q[i] << endl;
+ }
}
- }
- return 0;
+ return 0;
+ } catch( string &s ) {
+ cerr << "Exception: " << s << endl;
+ return 2;
+ }
}
--- /dev/null
+1
+
+1
+ConditionalProbEstimation [target_dim=2,total_dim=4,pseudo_count=1]
+1
+0 1 0
--- /dev/null
+1
+
+2
+0 1
+2 2
+4
+0 0.5
+1 0.5
+2 0.5
+3 0.5
\ No newline at end of file
--- /dev/null
+0 1
+0 1
+0 1
+0 1
+0 1
+0 1
+0 1
+0 1
+0 1
+0 1
+0 0
+1 1
+1 1
+1 1
+1 0
+1 0
+1 0
+1 0
+1 0
+1 0
+1 0
--- /dev/null
+1
+
+1
+ConditionalProbEstimation [target_dim=2,total_dim=8,pseudo_count=1]
+1
+0 1 0 2
--- /dev/null
+1
+
+3
+0 1 2
+2 2 2
+8
+0 0.5
+1 0.5
+2 0.5
+3 0.5
+4 0.5
+5 0.5
+6 0.5
+7 0.5
\ No newline at end of file
--- /dev/null
+0 1 2 4 6 7
+0 0 1 1 0
+1 1 1 0
+0 1 0 0 0 1
+0 0 0 1 0 0
+1 0 0 1 0
--- /dev/null
+1
+
+1
+ConditionalProbEstimation [target_dim=2,total_dim=8,pseudo_count=1]
+1
+2 1 2 4
--- /dev/null
+1
+
+1
+ConditionalProbEstimation [target_dim=2,total_dim=8,pseudo_count=1]
+3
+2 1 2 4
+1 0 1 6
+0 6 2 7
--- /dev/null
+1
+
+1
+ConditionalProbEstimation [target_dim=2,total_dim=8,pseudo_count=1]
+2
+2 1 2 4
+0 6 2 7
--- /dev/null
+#!/bin/bash
+TMPFILE1=`mktemp /var/tmp/testem.XXXXXX`
+trap 'rm -f $TMPFILE1' 0 1 15
+
+./testem 2var.fg 2var_data.tab 2var.em > $TMPFILE1
+./testem 3var.fg 2var_data.tab 3var.em >> $TMPFILE1
+./testem ../hoi1.fg hoi1_data.tab hoi1_share_f0_f2.em >> $TMPFILE1
+./testem ../hoi1.fg hoi1_data.tab hoi1_share_f0_f1_f2.em >> $TMPFILE1
+diff -s $TMPFILE1 testem.out || exit 1
+
+rm -f $TMPFILE1
--- /dev/null
+testem 2var.fg 2var_data.tab 2var.em > testem.out.tmp
+testem 3var.fg 2var_data.tab 3var.em >> testem.out.tmp
+testem ..\hoi1.fg hoi1_data.tab hoi1_share_f0_f2.em >> testem.out.tmp
+testem ..\hoi1.fg hoi1_data.tab hoi1_share_f0_f1_f2.em >> testem.out.tmp
+diff -s testem.out.tmp testem.out
+
+del testem.out.tmp
--- /dev/null
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include <dai/factorgraph.h>
+#include <dai/evidence.h>
+#include <dai/alldai.h>
+
+
+using namespace std;
+using namespace dai;
+
+
+void usage( const string &msg ) {
+ cerr << msg << endl;
+ cerr << "Usage:" << endl;
+ cerr << " testem factorgraph.fg evidence.tab emconfig.em" << endl;
+ exit( 1 );
+}
+
+
+int main( int argc, char** argv ) {
+ if( argc != 4 )
+ usage("Incorrect number of arguments.");
+
+ FactorGraph fg;
+ fg.ReadFromFile( argv[1] );
+
+ PropertySet infprops;
+ infprops.Set( "verbose", (size_t)1 );
+ infprops.Set( "updates", string("HUGIN") );
+ InfAlg* inf = newInfAlg( "JTREE", fg, infprops );
+ inf->init();
+
+ Evidence e;
+ ifstream estream( argv[2] );
+ e.addEvidenceTabFile( estream, fg );
+
+ cout << "Number of samples: " << e.nrSamples() << endl;
+ for( Evidence::iterator ps = e.begin(); ps != e.end(); ps++ )
+ cout << "Sample #" << (ps - e.begin()) << " has " << ps->observations().size() << " observations." << endl;
+
+ ifstream emstream( argv[3] );
+ EMAlg em(e, *inf, emstream);
+
+ while( !em.hasSatisfiedTermConditions() ) {
+ Real l = em.iterate();
+ cout << "Iteration " << em.getCurrentIters() << " likelihood: " << l <<endl;
+ }
+
+ cout << endl << "Inferred Factor Graph:" << endl << "######################" << endl;
+ cout.precision(12);
+ cout << inf->fg();
+
+ delete inf;
+
+ return 0;
+}
--- /dev/null
+Number of samples: 20
+Sample #0 has 2 observations.
+Sample #1 has 2 observations.
+Sample #2 has 2 observations.
+Sample #3 has 2 observations.
+Sample #4 has 2 observations.
+Sample #5 has 2 observations.
+Sample #6 has 2 observations.
+Sample #7 has 2 observations.
+Sample #8 has 2 observations.
+Sample #9 has 2 observations.
+Sample #10 has 2 observations.
+Sample #11 has 2 observations.
+Sample #12 has 2 observations.
+Sample #13 has 2 observations.
+Sample #14 has 2 observations.
+Sample #15 has 2 observations.
+Sample #16 has 2 observations.
+Sample #17 has 2 observations.
+Sample #18 has 2 observations.
+Sample #19 has 2 observations.
+Iteration 1 likelihood: -13.8629
+Iteration 2 likelihood: -9.56675
+Iteration 3 likelihood: -9.56675
+
+Inferred Factor Graph:
+######################
+1
+
+2
+0 1
+2 2
+4
+0 0.166666666667
+1 0.666666666667
+2 0.833333333333
+3 0.333333333333
+Number of samples: 20
+Sample #0 has 2 observations.
+Sample #1 has 2 observations.
+Sample #2 has 2 observations.
+Sample #3 has 2 observations.
+Sample #4 has 2 observations.
+Sample #5 has 2 observations.
+Sample #6 has 2 observations.
+Sample #7 has 2 observations.
+Sample #8 has 2 observations.
+Sample #9 has 2 observations.
+Sample #10 has 2 observations.
+Sample #11 has 2 observations.
+Sample #12 has 2 observations.
+Sample #13 has 2 observations.
+Sample #14 has 2 observations.
+Sample #15 has 2 observations.
+Sample #16 has 2 observations.
+Sample #17 has 2 observations.
+Sample #18 has 2 observations.
+Sample #19 has 2 observations.
+Iteration 1 likelihood: 0
+Iteration 2 likelihood: 3.97035
+Iteration 3 likelihood: 3.97035
+
+Inferred Factor Graph:
+######################
+1
+
+3
+0 1 2
+2 2 2
+8
+0 0.214285714286
+1 0.642857142857
+2 0.785714285714
+3 0.357142857143
+4 0.214285714286
+5 0.642857142857
+6 0.785714285714
+7 0.357142857143
+Number of samples: 5
+Sample #0 has 5 observations.
+Sample #1 has 4 observations.
+Sample #2 has 6 observations.
+Sample #3 has 6 observations.
+Sample #4 has 5 observations.
+Iteration 1 likelihood: 11.1646
+Iteration 2 likelihood: 1.53723
+Iteration 3 likelihood: 1.64691
+Iteration 4 likelihood: 1.67497
+Iteration 5 likelihood: 1.68191
+
+Inferred Factor Graph:
+######################
+3
+
+3
+2 6 7
+2 2 2
+8
+0 0.398343360806
+1 0.351464146565
+2 0.601656639194
+3 0.648535853435
+4 0.804844687957
+5 0.67374245803
+6 0.195155312043
+7 0.32625754197
+
+3
+0 1 6
+2 2 2
+8
+0 1.03521336269
+1 1.54747895212
+2 2.31765218974
+3 1.28041900719
+4 4.922079813
+5 2.52725575019
+6 0.831279296316
+7 0.262805630803
+
+3
+1 2 4
+2 2 2
+8
+0 0.398343360806
+1 0.601656639194
+2 0.351464146565
+3 0.648535853435
+4 0.804844687957
+5 0.195155312043
+6 0.67374245803
+7 0.32625754197
+Number of samples: 5
+Sample #0 has 5 observations.
+Sample #1 has 4 observations.
+Sample #2 has 6 observations.
+Sample #3 has 6 observations.
+Sample #4 has 5 observations.
+Iteration 1 likelihood: 11.1646
+Iteration 2 likelihood: -7.29331
+Iteration 3 likelihood: -7.261
+
+Inferred Factor Graph:
+######################
+3
+
+3
+2 6 7
+2 2 2
+8
+0 0.495312199726
+1 0.499107942908
+2 0.504687800274
+3 0.500892057092
+4 0.640416549958
+5 0.495122293924
+6 0.359583450042
+7 0.504877706076
+
+3
+0 1 6
+2 2 2
+8
+0 0.495312199726
+1 0.504687800274
+2 0.499107942908
+3 0.500892057092
+4 0.640416549958
+5 0.359583450042
+6 0.495122293924
+7 0.504877706076
+
+3
+1 2 4
+2 2 2
+8
+0 0.495312199726
+1 0.504687800274
+2 0.499107942908
+3 0.500892057092
+4 0.640416549958
+5 0.359583450042
+6 0.495122293924
+7 0.504877706076
trap 'rm -f $TMP_FILE' 0 1 15
./testall testfast.fg > $TMPFILE1
-diff -s $TMPFILE1 testfast.out
+diff -s $TMPFILE1 testfast.out || exit 1
rm -f $TMPFILE1