tests/testdai$(EE) : tests/testdai.cpp $(HEADERS) $(LIB)/libdai$(LE)
$(CC) $(CCO)tests/testdai$(EE) tests/testdai.cpp $(LIBS) $(BOOSTLIBS)
-tests/testem/testem$(EE): tests/testem/testem.cpp $(HEADERS) $(LIB)/libdai$(LE)
+tests/testem/testem$(EE) : tests/testem/testem.cpp $(HEADERS) $(LIB)/libdai$(LE)
$(CC) $(CCO)$@ $< $(LIBS) $(BOOSTLIBS)
# MATLAB INTERFACE
cd tests && testregression.bat && cd ..
endif
-testem: tests/testem/testem$(EE)
+testem : tests/testem/testem$(EE)
@echo Starting EM tests
cd tests/testem && ./runtests && cd ../..
+
# DOCUMENTATION
################
-/*
- Copyright 2009 Charles Vaske <cvaske@soe.ucsc.edu>
- University of California Santa Cruz
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
#ifndef __defined_libdai_emalg_h
#define __defined_libdai_emalg_h
-#include<vector>
-#include<map>
+
+#include <vector>
+#include <map>
#include <dai/factor.h>
#include <dai/daialg.h>
* EMAlg, ParameterEstimate, and FactorOrientations
*/
+
namespace dai {
-///Interface for a parameter estimation method.
+
+/// Interface for a parameter estimation method.
/** This parameter estimation interface is based on sufficient statistics.
* Implementations are responsible for collecting data from a probability
* vector passed to it from a SharedParameters container object.
* via the static ParameterEstimation::registerMethod function.
*/
class ParameterEstimation {
-public:
- /// A pointer to a factory function.
- typedef ParameterEstimation* (*ParamEstFactory)(const PropertySet&);
-
- /// General factory method for construction of ParameterEstimation subclasses.
- static ParameterEstimation* construct(const std::string& method,
- const PropertySet& p);
- /// Register a subclass with ParameterEstimation::construct.
- static void registerMethod(const std::string method,
- const ParamEstFactory f) {
- if (_registry == NULL) {
- loadDefaultRegistry();
- }
- (*_registry)[method] = f;
- }
- /// Virtual destructor for deleting pointers to derived classes.
- virtual ~ParameterEstimation() {}
- /// Estimate the factor using the accumulated sufficient statistics and reset.
- virtual Prob estimate() = 0;
- /// Accumulate the sufficient statistics for p.
- virtual void addSufficientStatistics(Prob& p) = 0;
- /// Returns the size of the Prob that is passed to addSufficientStatistics.
- virtual size_t probSize() const = 0;
- /// A virtual copy constructor.
- virtual ParameterEstimation* clone() const= 0;
-private:
- static std::map< std::string, ParamEstFactory >* _registry;
- static void loadDefaultRegistry();
+ public:
+ /// A pointer to a factory function.
+ typedef ParameterEstimation* (*ParamEstFactory)(const PropertySet&);
+
+ /// General factory method for construction of ParameterEstimation subclasses.
+ static ParameterEstimation* construct( const std::string& method, const PropertySet& p );
+ /// Register a subclass with ParameterEstimation::construct.
+ static void registerMethod( const std::string method, const ParamEstFactory f ) {
+ if( _registry == NULL )
+ loadDefaultRegistry();
+ (*_registry)[method] = f;
+ }
+ /// Virtual destructor for deleting pointers to derived classes.
+ virtual ~ParameterEstimation() {}
+ /// Estimate the factor using the accumulated sufficient statistics and reset.
+ virtual Prob estimate() = 0;
+ /// Accumulate the sufficient statistics for p.
+ virtual void addSufficientStatistics( Prob& p ) = 0;
+ /// Returns the size of the Prob that is passed to addSufficientStatistics.
+ virtual size_t probSize() const = 0;
+ /// A virtual copy constructor.
+ virtual ParameterEstimation* clone() const = 0;
+
+ private:
+ static std::map<std::string, ParamEstFactory>* _registry;
+ static void loadDefaultRegistry();
};
+
/// Estimates the parameters of a conditional probability, using pseudocounts.
class CondProbEstimation : private ParameterEstimation {
-private:
- size_t _target_dim;
- Prob _stats;
- Prob _initial_stats;
-public:
- /** For a conditional probability \f$ Pr( X | Y ) \f$,
- * \param target_dimension should equal \f$ | X | \f$
- * \param pseudocounts has length \f$ |X| \cdot |Y| \f$
- */
- CondProbEstimation(size_t target_dimension, Prob pseudocounts);
-
- /// Virtual constructor, using a PropertySet.
- /** Some keys in the PropertySet are required:
- * - target_dimension, which should be equal to \f$ | X | \f$
- * - total_dimension, which sholud be equal to \f$ |X| \cdot |Y| \f$
- *
- * An optional key is:
- * - pseudo_count which specifies the initial counts (defaults to 1)
- */
- static ParameterEstimation* factory(const PropertySet& p);
- /// Virtual destructor
- virtual ~CondProbEstimation() {}
- /// Returns an estimate of the conditional probability distribution.
- /** The format of the resulting Prob keeps all the values for
- * \f$ P(X | Y=a) \f$ sequential in teh array.
- */
- virtual Prob estimate();
- /// Accumulate sufficient statistics from the expectations in p.
- virtual void addSufficientStatistics(Prob& p);
- /// Returns the required size for arguments to addSufficientStatistics
- virtual size_t probSize() const { return _stats.size(); }
- /// Virtual copy constructor.
- virtual ParameterEstimation* clone() const {
- return new CondProbEstimation(_target_dim, _initial_stats);
- }
+ private:
+ size_t _target_dim;
+ Prob _stats;
+ Prob _initial_stats;
+ public:
+ /** For a conditional probability \f$ Pr( X | Y ) \f$,
+ * \param target_dimension should equal \f$ | X | \f$
+ * \param pseudocounts has length \f$ |X| \cdot |Y| \f$
+ */
+ CondProbEstimation( size_t target_dimension, Prob pseudocounts );
+
+ /// Virtual constructor, using a PropertySet.
+ /** Some keys in the PropertySet are required:
+ * - target_dimension, which should be equal to \f$ | X | \f$
+ * - total_dimension, which sholud be equal to \f$ |X| \cdot |Y| \f$
+ *
+ * An optional key is:
+ * - pseudo_count which specifies the initial counts (defaults to 1)
+ */
+ static ParameterEstimation* factory( const PropertySet& p );
+ /// Virtual destructor
+ virtual ~CondProbEstimation() {}
+ /// Returns an estimate of the conditional probability distribution.
+ /** The format of the resulting Prob keeps all the values for
+ * \f$ P(X | Y=a) \f$ sequential in teh array.
+ */
+ virtual Prob estimate();
+ /// Accumulate sufficient statistics from the expectations in p.
+ virtual void addSufficientStatistics( Prob& p );
+ /// Returns the required size for arguments to addSufficientStatistics
+ virtual size_t probSize() const {
+ return _stats.size();
+ }
+ /// Virtual copy constructor.
+ virtual ParameterEstimation* clone() const {
+ return new CondProbEstimation( _target_dim, _initial_stats );
+ }
};
+
/** A single factor or set of factors whose parameters should be
* estimated. Each factor's values are reordered to match a
* canonical variable ordering. This canonical variable ordering
* parameters can be shared between different factors during EM.
*/
class SharedParameters {
-public:
- /// Convenience label for an index into a FactorGraph to a factor.
- typedef size_t FactorIndex;
- /// Convenience label for a grouping of factor orientations.
- typedef std::map< FactorIndex, std::vector< Var > > FactorOrientations;
-private:
- std::map< FactorIndex, VarSet > _varsets;
- std::map< FactorIndex, Permute > _perms;
- FactorOrientations _varorders;
- ParameterEstimation* _estimation;
- bool _deleteEstimation;
-
- static Permute calculatePermutation(const std::vector< Var >& varorder,
- const std::vector< size_t >& dims,
- VarSet& outVS);
- void setPermsAndVarSetsFromVarOrders();
-public:
- /// Copy constructor
- SharedParameters(const SharedParameters& sp);
- /// Constructor useful in programmatic settings
- /** \param varorders all the factor orientations for this parameter
- \param estimation a pointer to the parameter estimation method
- */
- SharedParameters(const FactorOrientations& varorders,
- ParameterEstimation* estimation);
-
- /// Constructor for making an object from a stream
- SharedParameters(std::istream& is, const FactorGraph& fg_varlookup);
-
- /// Destructor
- ~SharedParameters() { if (_deleteEstimation) delete _estimation; }
-
- /// Collect the necessary statistics from expected values
- void collectSufficientStatistics(InfAlg& alg);
-
- /// Estimate and set the shared parameters
- void setParameters(FactorGraph& fg);
+ public:
+ /// Convenience label for an index into a FactorGraph to a factor.
+ typedef size_t FactorIndex;
+ /// Convenience label for a grouping of factor orientations.
+ typedef std::map< FactorIndex, std::vector< Var > > FactorOrientations;
+ private:
+ std::map<FactorIndex, VarSet> _varsets;
+ std::map<FactorIndex, Permute> _perms;
+ FactorOrientations _varorders;
+ ParameterEstimation* _estimation;
+ bool _deleteEstimation;
+
+ static Permute calculatePermutation( const std::vector<Var>& varorder, const std::vector<size_t>& dims, VarSet& outVS );
+ void setPermsAndVarSetsFromVarOrders();
+
+ public:
+ /// Copy constructor
+ SharedParameters( const SharedParameters& sp );
+ /// Constructor useful in programmatic settings
+ /** \param varorders all the factor orientations for this parameter
+ * \param estimation a pointer to the parameter estimation method
+ */
+ SharedParameters( const FactorOrientations& varorders, ParameterEstimation* estimation );
+
+ /// Constructor for making an object from a stream
+ SharedParameters( std::istream& is, const FactorGraph& fg_varlookup );
+
+ /// Destructor
+ ~SharedParameters() {
+ if( _deleteEstimation )
+ delete _estimation;
+ }
+
+ /// Collect the necessary statistics from expected values
+ void collectSufficientStatistics( InfAlg& alg );
+
+ /// Estimate and set the shared parameters
+ void setParameters( FactorGraph& fg );
};
+
/** A maximization step groups together several parameter estimation
- * tasks into a single unit.
+ * tasks into a single unit.
*/
class MaximizationStep {
-private:
- std::vector< SharedParameters > _params;
-public:
- MaximizationStep() : _params() {}
-
- /// Construct an step object taht contains all these estimation probelms
- MaximizationStep(std::vector< SharedParameters >& maximizations) :
- _params(maximizations) {}
-
- /// Construct a step from an input stream
- MaximizationStep(std::istream& is, const FactorGraph& fg_varlookup);
-
- /** Collect the beliefs from this InfAlg as expectations for
- * the next Maximization step.
- */
- void addExpectations(InfAlg& alg);
-
- /** Using all of the currently added expectations, make new factors
- * with maximized parameters and set them in the FactorGraph.
- */
- void maximize(FactorGraph& fg);
+ private:
+ std::vector<SharedParameters> _params;
+ public:
+ MaximizationStep() : _params() {}
+
+ /// Construct an step object taht contains all these estimation probelms
+ MaximizationStep( std::vector<SharedParameters>& maximizations ) : _params(maximizations) {}
+
+ /// Construct a step from an input stream
+ MaximizationStep( std::istream& is, const FactorGraph& fg_varlookup );
+
+ /** Collect the beliefs from this InfAlg as expectations for
+ * the next Maximization step.
+ */
+ void addExpectations( InfAlg& alg );
+
+ /** Using all of the currently added expectations, make new factors
+ * with maximized parameters and set them in the FactorGraph.
+ */
+ void maximize( FactorGraph& fg );
};
+
/// EMAlg performs Expectation Maximization to learn factor parameters.
/** This requires specifying:
* - Evidence (instances of observations from the graphical model),
* MaximizationSteps.
*/
class EMAlg {
-private:
- /// All the data samples used during learning
- const Evidence& _evidence;
+ private:
+ /// All the data samples used during learning
+ const Evidence& _evidence;
+
+ /// How to do the expectation step
+ InfAlg& _estep;
+
+ /// The maximization steps to take
+ std::vector<MaximizationStep> _msteps;
+ size_t _iters;
+ std::vector<Real> _lastLogZ;
+
+ size_t _max_iters;
+ Real _log_z_tol;
+
+ public:
+ /// Key for setting maximum iterations @see setTermConditions
+ static const std::string MAX_ITERS_KEY; //("max_iters");
+ /// Default maximum iterations
+ static const size_t MAX_ITERS_DEFAULT;
+ /// Key for setting likelihood termination condition @see setTermConditions
+ static const std::string LOG_Z_TOL_KEY;
+ /// Default log_z_tol
+ static const Real LOG_Z_TOL_DEFAULT;
+
+ /// Construct an EMAlg from all these objects
+ EMAlg( const Evidence& evidence, InfAlg& estep, std::vector<MaximizationStep>& msteps, PropertySet* termconditions = NULL) :
+ _evidence(evidence), _estep(estep), _msteps(msteps), _iters(0), _lastLogZ(), _max_iters(MAX_ITERS_DEFAULT), _log_z_tol(LOG_Z_TOL_DEFAULT) {
+ setTermConditions( termconditions );
+ }
- /// How to do the expectation step
- InfAlg& _estep;
+ /// Construct an EMAlg from an input stream
+ EMAlg( const Evidence& evidence, InfAlg& estep, std::istream& mstep_file );
+
+ /// Change the coditions for termination
+ /** There are two possible parameters in the PropertySety
+ * - max_iters maximum number of iterations (default 30)
+ * - log_z_tol proportion of increase in logZ (default 0.01)
+ *
+ * \see hasSatisifiedTermConditions()
+ */
+ void setTermConditions( const PropertySet* p );
+
+ /// Determine if the termination conditions have been met.
+ /** There are two sufficient termination conditions:
+ * -# the maximum number of iterations has been performed
+ * -# the ratio of logZ increase over previous logZ is less than the
+ * tolerance. I.e.
+ \f$ \frac{\log(Z_{current}) - \log(Z_{previous})}
+ {| \log(Z_{previous}) | } < tol \f$.
+ */
+ bool hasSatisfiedTermConditions() const;
+
+ size_t getCurrentIters() const {
+ return _iters;
+ }
+
+ /// Perform an iteration over all maximization steps
+ Real iterate();
+
+ /// Performs an iteration over a single MaximizationStep
+ Real iterate( MaximizationStep& mstep );
+
+ /// Iterate until termination conditions satisfied
+ void run();
+};
- /// The maximization steps to take
- std::vector<MaximizationStep> _msteps;
- size_t _iters;
- std::vector<Real> _lastLogZ;
-
- size_t _max_iters;
- Real _log_z_tol;
-
-public:
- /// Key for setting maximum iterations @see setTermConditions
- static const std::string MAX_ITERS_KEY;//("max_iters");
- /// Default maximum iterations
- static const size_t MAX_ITERS_DEFAULT;
- /// Key for setting likelihood termination condition @see setTermConditions
- static const std::string LOG_Z_TOL_KEY;
- /// Default log_z_tol
- static const Real LOG_Z_TOL_DEFAULT;
-
- /// Construct an EMAlg from all these objects
- EMAlg(const Evidence& evidence, InfAlg& estep,
- std::vector<MaximizationStep>& msteps,
- PropertySet* termconditions=NULL)
- : _evidence(evidence),
- _estep(estep),
- _msteps(msteps),
- _iters(0),
- _lastLogZ(),
- _max_iters(MAX_ITERS_DEFAULT),
- _log_z_tol(LOG_Z_TOL_DEFAULT) {
- setTermConditions(termconditions);
- }
-
- /// Construct an EMAlg from an input stream
- EMAlg(const Evidence& evidence, InfAlg& estep, std::istream& mstep_file);
-
- /// Change the coditions for termination
- /** There are two possible parameters in the PropertySety
- * - max_iters maximum number of iterations (default 30)
- * - log_z_tol proportion of increase in logZ (default 0.01)
- *
- * \see hasSatisifiedTermConditions()
- */
- void setTermConditions(const PropertySet* p);
-
- /// Determine if the termination conditions have been met.
- /** There are two sufficient termination conditions:
- * -# the maximum number of iterations has been performed
- * -# the ratio of logZ increase over previous logZ is less than the
- * tolerance. I.e.
- \f$ \frac{\log(Z_{current}) - \log(Z_{previous})}
- {| \log(Z_{previous}) | } < tol \f$.
- */
- bool hasSatisfiedTermConditions() const;
-
- size_t getCurrentIters() const { return _iters; }
-
- /// Perform an iteration over all maximization steps
- Real iterate();
-
- /// Performs an iteration over a single MaximizationStep
- Real iterate(MaximizationStep& mstep);
-
- /// Iterate until termination conditions satisfied
- void run();
-};
+} // end of namespace dai
-} // namespace dai
#endif
-/*
- Copyright 2009 Charles Vaske <cvaske@soe.ucsc.edu>
- University of California Santa Cruz
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
+/// \file
+/// \brief Defines classes Evidence and SampleData
+/// \todo Improve documentation
+
+
#ifndef __defined_libdai_evidence_h
#define __defined_libdai_evidence_h
-#include <istream>
+#include <istream>
#include <dai/daialg.h>
+
namespace dai {
-/// Store joint observations on a graphical model.
+
+/// Stores a set of observations on a graphical model.
class SampleData {
-private:
- std::string _name;
- std::map<Var, size_t> _obs;
-public:
- /// Construct an empty object
- SampleData() : _name(), _obs() {}
- /// Set the name of the sample
- void name(const std::string& name) { _name = name; }
- /// Get the name of the sample
- const std::string& name() const { return _name; }
- /// Read from the observation map
- const std::map<Var, size_t>& observations() const { return _obs; }
- /// Add an observation
- void addObservation(Var node, size_t setting);
- /// Add evidence by clamping variables to observed values.
- void applyEvidence(InfAlg& alg) const;
+ private:
+ std::string _name;
+ std::map<Var, size_t> _obs;
+ public:
+ /// Construct an empty object
+ SampleData() : _name(), _obs() {}
+ /// Set the name of the sample
+ void name(const std::string& name) { _name = name; }
+ /// Get the name of the sample
+ const std::string& name() const { return _name; }
+ /// Read from the observation map
+ const std::map<Var, size_t>& observations() const { return _obs; }
+ /// Add an observation
+ void addObservation(Var node, size_t setting);
+ /// Add evidence by clamping variables to observed values.
+ void applyEvidence(InfAlg& alg) const;
};
+
/// Store observations from a graphical model.
class Evidence {
-private:
- std::map< std::string, SampleData > _samples;
+ private:
+ std::map<std::string, SampleData> _samples;
public:
- /// Start with empty obects, then fill with calls to addEvidenceTabFile()
- Evidence() : _samples() {}
+ /// Start with empty obects, then fill with calls to addEvidenceTabFile()
+ Evidence() : _samples() {}
- /** Read in tab-data from a stream. Each line contains one sample, and
- * the first line is a header line with names. The first column contains
- * names for each of the samples.
- */
- void addEvidenceTabFile(std::istream& is,
- std::map< std::string, Var >& varMap);
-
- /** Read in tab-data from a stream. Each line contains one sample,
- * and the first line is a header line with variable IDs. The first
- * column contains names for each of the samples.
- */
- void addEvidenceTabFile(std::istream& is, FactorGraph& fg);
+ /** Read in tab-data from a stream. Each line contains one sample, and
+ * the first line is a header line with names. The first column contains
+ * names for each of the samples.
+ */
+ void addEvidenceTabFile(std::istream& is, std::map< std::string, Var >& varMap);
+
+ /** Read in tab-data from a stream. Each line contains one sample,
+ * and the first line is a header line with variable IDs. The first
+ * column contains names for each of the samples.
+ */
+ void addEvidenceTabFile(std::istream& is, FactorGraph& fg);
- /// Total number of samples in this evidence file
- size_t nrSamples() const { return _samples.size(); }
-
- /// @name iterator interface
- //@{
- typedef std::map< std::string, SampleData >::iterator iterator;
- typedef std::map< std::string, SampleData >::const_iterator const_iterator;
- iterator begin() { return _samples.begin(); }
- const_iterator begin() const { return _samples.begin(); }
- iterator end() { return _samples.end(); }
- const_iterator end() const { return _samples.end(); }
- //@}
+ /// Total number of samples in this evidence file
+ size_t nrSamples() const { return _samples.size(); }
+ /// @name iterator interface
+ //@{
+ typedef std::map< std::string, SampleData >::iterator iterator;
+ typedef std::map< std::string, SampleData >::const_iterator const_iterator;
+ iterator begin() { return _samples.begin(); }
+ const_iterator begin() const { return _samples.begin(); }
+ iterator end() { return _samples.end(); }
+ const_iterator end() const { return _samples.end(); }
+ //@}
};
-
-}
+
+
+} // end of namespace dai
+
#endif
IMPOSSIBLE_TYPECAST,
INTERNAL_ERROR,
NOT_NORMALIZABLE,
- INVALID_EVIDENCE_FILE,
- INVALID_EVIDENCE_LINE,
- INVALID_EVIDENCE_OBSERVATION,
- INVALID_SHARED_PARAMETERS_ORDER,
- INVALID_SHARED_PARAMETERS_INPUT_LINE,
- UNKNOWN_PARAMETER_ESTIMATION_METHOD,
+ INVALID_EVIDENCE_FILE,
+ INVALID_EVIDENCE_LINE,
+ INVALID_EVIDENCE_OBSERVATION,
+ INVALID_SHARED_PARAMETERS_ORDER,
+ INVALID_SHARED_PARAMETERS_INPUT_LINE,
+ UNKNOWN_PARAMETER_ESTIMATION_METHOD,
NUM_ERRORS}; // NUM_ERRORS should be the last entry
/// Constructor
size_t maxSize() { return _maxsize; }
};
+
/// Split a string into tokens
-void tokenizeString(const std::string& s,
+void tokenizeString( const std::string& s,
std::vector<std::string>& outTokens,
- const std::string& delim="\t\n");
+ const std::string& delim="\t\n" );
+
} // end of namespace dai
-#include <dai/util.h>
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#include <dai/util.h>
#include <dai/emalg.h>
-namespace dai{
-std::map< std::string, ParameterEstimation::ParamEstFactory>*
-ParameterEstimation::_registry = NULL;
+namespace dai {
+
+
+std::map<std::string, ParameterEstimation::ParamEstFactory> *ParameterEstimation::_registry = NULL;
+
void ParameterEstimation::loadDefaultRegistry() {
- _registry = new std::map< std::string, ParamEstFactory>();
- (*_registry)["ConditionalProbEstimation"] = CondProbEstimation::factory;
+ _registry = new std::map< std::string, ParamEstFactory>();
+ (*_registry)["ConditionalProbEstimation"] = CondProbEstimation::factory;
}
-ParameterEstimation* ParameterEstimation::construct(const std::string& method,
- const PropertySet& p) {
- if (_registry == NULL) {
- loadDefaultRegistry();
- }
- std::map< std::string, ParamEstFactory>::iterator i = _registry->find(method);
- if (i == _registry->end()) {
- DAI_THROW(UNKNOWN_PARAMETER_ESTIMATION_METHOD);
- }
- ParamEstFactory factory = i->second;
- return factory(p);
+
+ParameterEstimation* ParameterEstimation::construct( const std::string& method, const PropertySet& p ) {
+ if (_registry == NULL)
+ loadDefaultRegistry();
+ std::map< std::string, ParamEstFactory>::iterator i = _registry->find(method);
+ if (i == _registry->end())
+ DAI_THROW(UNKNOWN_PARAMETER_ESTIMATION_METHOD);
+ ParamEstFactory factory = i->second;
+ return factory(p);
}
-ParameterEstimation* CondProbEstimation::factory(const PropertySet& p) {
- size_t target_dimension = p.getStringAs<size_t>("target_dim");
- size_t total_dimension = p.getStringAs<size_t>("total_dim");
- Real pseudo_count = 1;
- if (p.hasKey("pseudo_count")) {
- pseudo_count = p.getStringAs<Real>("pseudo_count");
- }
- Prob counts_vec(total_dimension, pseudo_count);
- return new CondProbEstimation(target_dimension, counts_vec);
+
+ParameterEstimation* CondProbEstimation::factory( const PropertySet& p ) {
+ size_t target_dimension = p.getStringAs<size_t>("target_dim");
+ size_t total_dimension = p.getStringAs<size_t>("total_dim");
+ Real pseudo_count = 1;
+ if (p.hasKey("pseudo_count"))
+ pseudo_count = p.getStringAs<Real>("pseudo_count");
+ Prob counts_vec(total_dimension, pseudo_count);
+ return new CondProbEstimation(target_dimension, counts_vec);
}
-CondProbEstimation::CondProbEstimation(size_t target_dimension,
- Prob pseudocounts)
- : _target_dim(target_dimension),
- _stats(pseudocounts),
- _initial_stats(pseudocounts) {
- if (_stats.size() % _target_dim) {
- DAI_THROW(MALFORMED_PROPERTY);
- }
+
+CondProbEstimation::CondProbEstimation( size_t target_dimension, Prob pseudocounts )
+ : _target_dim(target_dimension), _stats(pseudocounts), _initial_stats(pseudocounts)
+{
+ if (_stats.size() % _target_dim)
+ DAI_THROW(MALFORMED_PROPERTY);
}
-void CondProbEstimation::addSufficientStatistics(Prob& p) {
- _stats += p;
+
+void CondProbEstimation::addSufficientStatistics( Prob& p ) {
+ _stats += p;
}
+
Prob CondProbEstimation::estimate() {
- for (size_t parent = 0; parent < _stats.size(); parent += _target_dim) {
- Real norm = 0;
- size_t top = parent + _target_dim;
- for (size_t i = parent; i < top; ++i) {
- norm += _stats[i];
- }
- if (norm != 0) {
- norm = 1 / norm;
- }
- for (size_t i = parent; i < top; ++i) {
- _stats[i] *= norm;
+ for (size_t parent = 0; parent < _stats.size(); parent += _target_dim) {
+ Real norm = 0;
+ size_t top = parent + _target_dim;
+ for (size_t i = parent; i < top; ++i)
+ norm += _stats[i];
+ if (norm != 0)
+ norm = 1 / norm;
+ for (size_t i = parent; i < top; ++i)
+ _stats[i] *= norm;
}
- }
- Prob result = _stats;
- _stats = _initial_stats;
- return result;
+ Prob result = _stats;
+ _stats = _initial_stats;
+ return result;
}
-Permute
-SharedParameters::calculatePermutation(const std::vector< Var >& varorder,
- const std::vector< size_t >& dims,
- VarSet& outVS) {
- std::vector<long> labels(dims.size());
+
+Permute SharedParameters::calculatePermutation(const std::vector< Var >& varorder, const std::vector< size_t >& dims, VarSet& outVS) {
+ std::vector<long> labels(dims.size());
- // Check that the variable set is compatible
- if (varorder.size() != dims.size()) {
- DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
- }
+ // Check that the variable set is compatible
+ if (varorder.size() != dims.size())
+ DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
- // Collect all labels, and order them in vs
- for (size_t di = 0; di < dims.size(); ++di) {
- if (dims[di] != varorder[di].states()) {
- DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
+ // Collect all labels, and order them in vs
+ for (size_t di = 0; di < dims.size(); ++di) {
+ if (dims[di] != varorder[di].states())
+ DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
+ outVS |= varorder[di];
+ labels[di] = varorder[di].label();
}
- outVS |= varorder[di];
- labels[di] = varorder[di].label();
- }
- // Construct the sigma array for the permutation object
- std::vector<size_t> sigma(dims.size(), 0);
- VarSet::iterator set_iterator = outVS.begin();
- for (size_t vs_i = 0; vs_i < dims.size(); ++vs_i, ++set_iterator) {
- std::vector< long >::iterator location = find(labels.begin(), labels.end(),
- set_iterator->label());
- sigma[vs_i] = location - labels.begin();
- }
+ // Construct the sigma array for the permutation object
+ std::vector<size_t> sigma(dims.size(), 0);
+ VarSet::iterator set_iterator = outVS.begin();
+ for (size_t vs_i = 0; vs_i < dims.size(); ++vs_i, ++set_iterator) {
+ std::vector< long >::iterator location = find(labels.begin(), labels.end(), set_iterator->label());
+ sigma[vs_i] = location - labels.begin();
+ }
- return Permute(dims, sigma);
+ return Permute(dims, sigma);
}
+
void SharedParameters::setPermsAndVarSetsFromVarOrders() {
- if (_varorders.size() == 0) {
- return;
- }
- FactorOrientations::const_iterator foi = _varorders.begin();
- std::vector< size_t > dims(foi->second.size());
- size_t total_dim = 1;
- for (size_t i = 0; i < dims.size(); ++i) {
- dims[i] = foi->second[i].states();
- total_dim *= dims[i];
- }
+ if (_varorders.size() == 0)
+ return;
+ FactorOrientations::const_iterator foi = _varorders.begin();
+ std::vector< size_t > dims(foi->second.size());
+ size_t total_dim = 1;
+ for (size_t i = 0; i < dims.size(); ++i) {
+ dims[i] = foi->second[i].states();
+ total_dim *= dims[i];
+ }
- // Construct the permutation objects
- for ( ; foi != _varorders.end(); ++foi) {
- VarSet vs;
- _perms[foi->first] = calculatePermutation(foi->second, dims, vs);
- _varsets[foi->first] = vs;
- }
+ // Construct the permutation objects
+ for ( ; foi != _varorders.end(); ++foi) {
+ VarSet vs;
+ _perms[foi->first] = calculatePermutation(foi->second, dims, vs);
+ _varsets[foi->first] = vs;
+ }
- if (_estimation == NULL || _estimation->probSize() != total_dim) {
- DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
- }
+ if (_estimation == NULL || _estimation->probSize() != total_dim)
+ DAI_THROW(INVALID_SHARED_PARAMETERS_ORDER);
}
-SharedParameters::SharedParameters(std::istream& is,
- const FactorGraph& fg_varlookup)
- : _varsets(),
- _perms(),
- _varorders(),
- _estimation(NULL),
- _deleteEstimation(1)
-{
- std::string est_method;
- PropertySet props;
- is >> est_method;
- is >> props;
-
- _estimation = ParameterEstimation::construct(est_method, props);
-
- size_t num_factors;
- is >> num_factors;
- for (size_t sp_i = 0; sp_i < num_factors; ++sp_i) {
- std::string line;
- std::vector< std::string > fields;
- size_t factor;
- std::vector< Var > var_order;
- std::istringstream iss;
-
- while(line.size() == 0 && getline(is, line));
- tokenizeString(line, fields, " \t");
-
- // Lookup the factor in the factorgraph
- if (fields.size() < 1) {
- DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
- }
- iss.str(fields[0]);
- iss >> factor;
- const VarSet& vs = fg_varlookup.factor(factor).vars();
- if (fields.size() != vs.size() + 1) {
- DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
- }
- // Construct the vector of Vars
- for (size_t fi = 1; fi < fields.size(); ++fi) {
- // Lookup a single variable by label
- long label;
- std::istringstream labelparse(fields[fi]);
- labelparse >> label;
- VarSet::const_iterator vsi = vs.begin();
- for ( ; vsi != vs.end(); ++vsi) {
- if (vsi->label() == label) break;
- }
- if (vsi == vs.end()) {
- DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
- }
- var_order.push_back(*vsi);
+SharedParameters::SharedParameters(std::istream& is, const FactorGraph& fg_varlookup)
+ : _varsets(), _perms(), _varorders(), _estimation(NULL), _deleteEstimation(1)
+{
+ std::string est_method;
+ PropertySet props;
+ is >> est_method;
+ is >> props;
+
+ _estimation = ParameterEstimation::construct(est_method, props);
+
+ size_t num_factors;
+ is >> num_factors;
+ for (size_t sp_i = 0; sp_i < num_factors; ++sp_i) {
+ std::string line;
+ std::vector< std::string > fields;
+ size_t factor;
+ std::vector< Var > var_order;
+ std::istringstream iss;
+
+ while(line.size() == 0 && getline(is, line))
+ ;
+ tokenizeString(line, fields, " \t");
+
+ // Lookup the factor in the factorgraph
+ if (fields.size() < 1)
+ DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
+ iss.str(fields[0]);
+ iss >> factor;
+ const VarSet& vs = fg_varlookup.factor(factor).vars();
+ if (fields.size() != vs.size() + 1)
+ DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
+
+ // Construct the vector of Vars
+ for (size_t fi = 1; fi < fields.size(); ++fi) {
+ // Lookup a single variable by label
+ long label;
+ std::istringstream labelparse(fields[fi]);
+ labelparse >> label;
+ VarSet::const_iterator vsi = vs.begin();
+ for ( ; vsi != vs.end(); ++vsi)
+ if (vsi->label() == label)
+ break;
+ if (vsi == vs.end())
+ DAI_THROW(INVALID_SHARED_PARAMETERS_INPUT_LINE);
+ var_order.push_back(*vsi);
+ }
+ _varorders[factor] = var_order;
}
- _varorders[factor] = var_order;
- }
- setPermsAndVarSetsFromVarOrders();
+ setPermsAndVarSetsFromVarOrders();
}
-SharedParameters::SharedParameters(const SharedParameters& sp)
- : _varsets(sp._varsets),
- _perms(sp._perms),
- _varorders(sp._varorders),
- _estimation(sp._estimation),
- _deleteEstimation(sp._deleteEstimation)
+
+SharedParameters::SharedParameters( const SharedParameters& sp )
+ : _varsets(sp._varsets), _perms(sp._perms), _varorders(sp._varorders), _estimation(sp._estimation), _deleteEstimation(sp._deleteEstimation)
{
- if (_deleteEstimation) {
- _estimation = _estimation->clone();
- }
+ if (_deleteEstimation)
+ _estimation = _estimation->clone();
}
-SharedParameters::SharedParameters(const FactorOrientations& varorders,
- ParameterEstimation* estimation)
- : _varsets(),
- _perms(),
- _varorders(varorders),
- _estimation(estimation),
- _deleteEstimation(0)
+
+SharedParameters::SharedParameters( const FactorOrientations& varorders, ParameterEstimation* estimation )
+ : _varsets(), _perms(), _varorders(varorders), _estimation(estimation), _deleteEstimation(0)
{
setPermsAndVarSetsFromVarOrders();
}
+
void SharedParameters::collectSufficientStatistics(InfAlg& alg) {
- std::map< FactorIndex, Permute >::iterator i = _perms.begin();
- for ( ; i != _perms.end(); ++i) {
- Permute& perm = i->second;
- VarSet& vs = _varsets[i->first];
-
- Factor b = alg.belief(vs);
- Prob p(b.states(), 0.0);
- for (size_t entry = 0; entry < b.states(); ++entry) {
- p[entry] = b[perm.convert_linear_index(entry)];
+ std::map< FactorIndex, Permute >::iterator i = _perms.begin();
+ for ( ; i != _perms.end(); ++i) {
+ Permute& perm = i->second;
+ VarSet& vs = _varsets[i->first];
+
+ Factor b = alg.belief(vs);
+ Prob p(b.states(), 0.0);
+ for (size_t entry = 0; entry < b.states(); ++entry)
+ p[entry] = b[perm.convert_linear_index(entry)];
+ _estimation->addSufficientStatistics(p);
}
- _estimation->addSufficientStatistics(p);
- }
}
+
void SharedParameters::setParameters(FactorGraph& fg) {
- Prob p = _estimation->estimate();
- std::map< FactorIndex, Permute >::iterator i = _perms.begin();
- for ( ; i != _perms.end(); ++i) {
- Permute& perm = i->second;
- VarSet& vs = _varsets[i->first];
-
- Factor f(vs, 0.0);
- for (size_t entry = 0; entry < f.states(); ++entry) {
- f[perm.convert_linear_index(entry)] = p[entry];
+ Prob p = _estimation->estimate();
+ std::map< FactorIndex, Permute >::iterator i = _perms.begin();
+ for ( ; i != _perms.end(); ++i) {
+ Permute& perm = i->second;
+ VarSet& vs = _varsets[i->first];
+
+ Factor f(vs, 0.0);
+ for (size_t entry = 0; entry < f.states(); ++entry)
+ f[perm.convert_linear_index(entry)] = p[entry];
+
+ fg.setFactor(i->first, f);
}
-
- fg.setFactor(i->first, f);
- }
}
-MaximizationStep::MaximizationStep (std::istream& is,
- const FactorGraph& fg_varlookup )
+
+MaximizationStep::MaximizationStep (std::istream& is, const FactorGraph& fg_varlookup )
: _params()
{
- size_t num_params = -1;
- is >> num_params;
- _params.reserve(num_params);
- for (size_t i = 0; i < num_params; ++i) {
- SharedParameters p(is, fg_varlookup);
- _params.push_back(p);
- }
+ size_t num_params = -1;
+ is >> num_params;
+ _params.reserve(num_params);
+ for (size_t i = 0; i < num_params; ++i) {
+ SharedParameters p(is, fg_varlookup);
+ _params.push_back(p);
+ }
}
void MaximizationStep::addExpectations(InfAlg& alg) {
- for (size_t i = 0; i < _params.size(); ++i) {
- _params[i].collectSufficientStatistics(alg);
- }
+ for (size_t i = 0; i < _params.size(); ++i)
+ _params[i].collectSufficientStatistics(alg);
}
+
void MaximizationStep::maximize(FactorGraph& fg) {
- for (size_t i = 0; i < _params.size(); ++i) {
- _params[i].setParameters(fg);
- }
+ for (size_t i = 0; i < _params.size(); ++i)
+ _params[i].setParameters(fg);
}
+
const std::string EMAlg::MAX_ITERS_KEY("max_iters");
const std::string EMAlg::LOG_Z_TOL_KEY("log_z_tol");
const size_t EMAlg::MAX_ITERS_DEFAULT = 30;
const Real EMAlg::LOG_Z_TOL_DEFAULT = 0.01;
+
EMAlg::EMAlg(const Evidence& evidence, InfAlg& estep, std::istream& msteps_file)
- : _evidence(evidence),
- _estep(estep),
- _msteps(),
- _iters(0),
- _lastLogZ(),
- _max_iters(MAX_ITERS_DEFAULT),
- _log_z_tol(LOG_Z_TOL_DEFAULT)
+ : _evidence(evidence), _estep(estep), _msteps(), _iters(0), _lastLogZ(), _max_iters(MAX_ITERS_DEFAULT), _log_z_tol(LOG_Z_TOL_DEFAULT)
{
- msteps_file.exceptions( std::istream::eofbit | std::istream::failbit
- | std::istream::badbit );
- size_t num_msteps = -1;
- msteps_file >> num_msteps;
- _msteps.reserve(num_msteps);
- for (size_t i = 0; i < num_msteps; ++i) {
- MaximizationStep m(msteps_file, estep.fg());
- _msteps.push_back(m);
- }
+ msteps_file.exceptions( std::istream::eofbit | std::istream::failbit | std::istream::badbit );
+ size_t num_msteps = -1;
+ msteps_file >> num_msteps;
+ _msteps.reserve(num_msteps);
+ for (size_t i = 0; i < num_msteps; ++i) {
+ MaximizationStep m(msteps_file, estep.fg());
+ _msteps.push_back(m);
+ }
}
+
void EMAlg::setTermConditions(const PropertySet* p) {
- if (NULL == p) {
- return;
- }
- if (p->hasKey(MAX_ITERS_KEY)) {
- _max_iters = p->getStringAs<size_t>(MAX_ITERS_KEY);
- }
- if (p->hasKey(LOG_Z_TOL_KEY)) {
- _log_z_tol = p->getStringAs<Real>(LOG_Z_TOL_KEY);
- }
+ if (NULL == p)
+ return;
+ if (p->hasKey(MAX_ITERS_KEY))
+ _max_iters = p->getStringAs<size_t>(MAX_ITERS_KEY);
+ if (p->hasKey(LOG_Z_TOL_KEY))
+ _log_z_tol = p->getStringAs<Real>(LOG_Z_TOL_KEY);
}
+
bool EMAlg::hasSatisfiedTermConditions() const {
- if (_iters >= _max_iters) {
- return 1;
- } else if (_lastLogZ.size() < 3) {
- // need at least 2 to calculate ratio
- // Also, throw away first iteration, as the parameters may not
- // have been normalized according to the estimation method
- return 0;
- } else {
- Real current = _lastLogZ[_lastLogZ.size() - 1];
- Real previous = _lastLogZ[_lastLogZ.size() - 2];
- if (previous == 0) return 0;
- Real diff = current - previous;
- if (diff < 0) {
- std::cerr << "Error: in EM log-likehood decreased from " << previous
- << " to " << current << std::endl;
- return 1;
+ if (_iters >= _max_iters) {
+ return 1;
+ } else if (_lastLogZ.size() < 3) {
+ // need at least 2 to calculate ratio
+ // Also, throw away first iteration, as the parameters may not
+ // have been normalized according to the estimation method
+ return 0;
+ } else {
+ Real current = _lastLogZ[_lastLogZ.size() - 1];
+ Real previous = _lastLogZ[_lastLogZ.size() - 2];
+ if (previous == 0) return 0;
+ Real diff = current - previous;
+ if (diff < 0) {
+ std::cerr << "Error: in EM log-likehood decreased from " << previous << " to " << current << std::endl;
+ return 1;
+ }
+ return diff / abs(previous) <= _log_z_tol;
}
- return diff / abs(previous) <= _log_z_tol;
- }
}
+
Real EMAlg::iterate(MaximizationStep& mstep) {
- Evidence::const_iterator e = _evidence.begin();
- Real logZ = 0;
-
- // Expectation calculation
- for ( ; e != _evidence.end(); ++e) {
- InfAlg* clamped = _estep.clone();
- e->second.applyEvidence(*clamped);
- clamped->run();
-
- logZ += clamped->logZ();
+ Evidence::const_iterator e = _evidence.begin();
+ Real logZ = 0;
- mstep.addExpectations(*clamped);
+ // Expectation calculation
+ for ( ; e != _evidence.end(); ++e) {
+ InfAlg* clamped = _estep.clone();
+ e->second.applyEvidence(*clamped);
+ clamped->run();
+
+ logZ += clamped->logZ();
- delete clamped;
- }
-
- // Maximization of parameters
- mstep.maximize(_estep.fg());
+ mstep.addExpectations(*clamped);
- return logZ;
+ delete clamped;
+ }
+
+ // Maximization of parameters
+ mstep.maximize(_estep.fg());
+
+ return logZ;
}
+
Real EMAlg::iterate() {
- Real likelihood;
- for (size_t i = 0; i < _msteps.size(); ++i) {
- likelihood = iterate(_msteps[i]);
- }
- _lastLogZ.push_back(likelihood);
- ++_iters;
- return likelihood;
+ Real likelihood;
+ for (size_t i = 0; i < _msteps.size(); ++i)
+ likelihood = iterate(_msteps[i]);
+ _lastLogZ.push_back(likelihood);
+ ++_iters;
+ return likelihood;
}
+
void EMAlg::run() {
- while(!hasSatisfiedTermConditions()) {
- iterate();
- }
+ while(!hasSatisfiedTermConditions())
+ iterate();
}
-}
+
+} // end of namespace dai
+/* Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
+ University of California Santa Cruz
+
+ This file is part of libDAI.
+
+ libDAI is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ libDAI is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libDAI; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
#include <sstream>
#include <string>
#include <cstdlib>
#include <dai/util.h>
-
#include <dai/evidence.h>
+
namespace dai {
-void SampleData::addObservation(Var node, size_t setting) {
+
+void SampleData::addObservation( Var node, size_t setting ) {
_obs[node] = setting;
}
-
-void SampleData::applyEvidence(InfAlg& alg) const {
- std::map< Var, size_t>::const_iterator i = _obs.begin();
- for( ; i != _obs.end(); ++i) {
- alg.clamp(i->first, i->second);
- }
+
+
+void SampleData::applyEvidence( InfAlg& alg ) const {
+ std::map<Var, size_t>::const_iterator i = _obs.begin();
+ for( ; i != _obs.end(); ++i )
+ alg.clamp( i->first, i->second );
}
-void Evidence::addEvidenceTabFile(std::istream& is, FactorGraph& fg) {
- std::map< std::string, Var > varMap;
- std::vector< Var >::const_iterator v = fg.vars().begin();
- for(; v != fg.vars().end(); ++v) {
- std::stringstream s;
- s << v->label();
- varMap[s.str()] = *v;
- }
-
- addEvidenceTabFile(is, varMap);
-}
-void Evidence::addEvidenceTabFile(std::istream& is,
- std::map< std::string, Var >& varMap) {
-
- std::vector< std::string > header_fields;
- std::vector< Var > vars;
- std::string line;
- getline(is, line);
-
- // Parse header
- tokenizeString(line, header_fields);
- std::vector< std::string >::const_iterator p_field = header_fields.begin();
+void Evidence::addEvidenceTabFile( std::istream& is, FactorGraph& fg ) {
+ std::map<std::string, Var> varMap;
+ std::vector<Var>::const_iterator v = fg.vars().begin();
+ for( ; v != fg.vars().end(); ++v ) {
+ std::stringstream s;
+ s << v->label();
+ varMap[s.str()] = *v;
+ }
- if (p_field == header_fields.end()) { DAI_THROW(INVALID_EVIDENCE_LINE); }
+ addEvidenceTabFile( is, varMap );
+}
- ++p_field; // first column are sample labels
- for ( ; p_field != header_fields.end(); ++p_field) {
- std::map< std::string, Var >::iterator elem = varMap.find(*p_field);
- if (elem == varMap.end()) {
- DAI_THROW(INVALID_EVIDENCE_FILE);
- }
- vars.push_back(elem->second);
- }
-
-
- // Read samples
- while(getline(is, line)) {
- std::vector< std::string > fields;
- tokenizeString(line, fields);
-
- if (fields.size() != vars.size() + 1) { DAI_THROW(INVALID_EVIDENCE_LINE); }
+void Evidence::addEvidenceTabFile( std::istream& is, std::map<std::string, Var>& varMap ) {
+ std::vector<std::string> header_fields;
+ std::vector<Var> vars;
+ std::string line;
+ getline( is, line );
- SampleData& sampleData = _samples[fields[0]];
- sampleData.name(fields[0]); // in case of a new sample
- for (size_t i = 0; i < vars.size(); ++i) {
- if (fields[i+1].size() > 0) { // skip if missing observation
- if (fields[i+1].find_first_not_of("0123456789") != std::string::npos) {
- DAI_THROW(INVALID_EVIDENCE_OBSERVATION);
- }
- size_t state = atoi(fields[i+1].c_str());
- if (state >= vars[i].states()) {
- DAI_THROW(INVALID_EVIDENCE_OBSERVATION);
- }
- sampleData.addObservation(vars[i], state);
- }
+ // Parse header
+ tokenizeString( line, header_fields );
+ std::vector<std::string>::const_iterator p_field = header_fields.begin();
+
+ if( p_field == header_fields.end() )
+ DAI_THROW(INVALID_EVIDENCE_LINE);
+
+ ++p_field; // first column are sample labels
+ for( ; p_field != header_fields.end(); ++p_field ) {
+ std::map<std::string, Var>::iterator elem = varMap.find( *p_field );
+ if( elem == varMap.end() )
+ DAI_THROW(INVALID_EVIDENCE_FILE);
+ vars.push_back( elem->second );
}
- } // finished sample line
-}
+
+ // Read samples
+ while( getline(is, line) ) {
+ std::vector<std::string> fields;
+ tokenizeString( line, fields );
+
+ if( fields.size() != vars.size() + 1 )
+ DAI_THROW(INVALID_EVIDENCE_LINE);
+
+ SampleData& sampleData = _samples[fields[0]];
+ sampleData.name( fields[0] ); // in case of a new sample
+ for( size_t i = 0; i < vars.size(); ++i ) {
+ if( fields[i+1].size() > 0 ) { // skip if missing observation
+ if( fields[i+1].find_first_not_of("0123456789") != std::string::npos )
+ DAI_THROW(INVALID_EVIDENCE_OBSERVATION);
+ size_t state = atoi( fields[i+1].c_str() );
+ if( state >= vars[i].states() )
+ DAI_THROW(INVALID_EVIDENCE_OBSERVATION);
+ sampleData.addObservation( vars[i], state );
+ }
+ }
+ } // finished sample line
}
+
+
+} // end of namespace dai
"Impossible typecast",
"Internal error",
"Quantity not normalizable",
- "Can't parse Evidence file",
- "Can't parse Evidence line",
- "Invalid observation in Evidence file",
- "Invalid variable order in SharedParameters",
- "Variable order line in EM file is invalid",
- "Unrecognized parameter estimation method"
+ "Cannot parse Evidence file",
+ "Cannot parse Evidence line",
+ "Invalid observation in Evidence file",
+ "Invalid variable order in SharedParameters",
+ "Variable order line in EM file is invalid",
+ "Unrecognized parameter estimation method"
};
}
-
}
void tokenizeString(const std::string& s,
- std::vector<std::string>& outTokens,
- const std::string& delim)
+ std::vector<std::string>& outTokens,
+ const std::string& delim)
{
- size_t start = 0;
- while (start < s.size()) {
- size_t end = s.find_first_of(delim, start);
- if (end > s.size()) {
- end = s.size();
+ size_t start = 0;
+ while (start < s.size()) {
+ size_t end = s.find_first_of(delim, start);
+ if (end > s.size())
+ end = s.size();
+ outTokens.push_back(s.substr(start, end - start));
+ start = end + 1;
}
- outTokens.push_back(s.substr(start, end - start));
- start = end + 1;
- }
}
} // end of namespace dai
-#include<iostream>
-#include<fstream>
-#include<string>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include <dai/factorgraph.h>
+#include <dai/evidence.h>
+#include <dai/alldai.h>
-#include<dai/factorgraph.h>
-#include<dai/evidence.h>
-#include<dai/alldai.h>
using namespace std;
using namespace dai;
-void usage(const string& msg) {
- cerr << msg << endl;
- cerr << "Usage:" << endl;
- cerr << " testem factorgraph.fg evidence.tab emconfig.em" << endl;
- exit(1);
+
+void usage( const string& msg ) {
+ cerr << msg << endl;
+ cerr << "Usage:" << endl;
+ cerr << " testem factorgraph.fg evidence.tab emconfig.em" << endl;
+ exit( 1 );
}
-int main(int argc, char** argv) {
- if (argc != 4) {
- usage("Incorrect number of arguments.");
- }
-
- FactorGraph fg;
- ifstream fgstream(argv[1]);
- fgstream >> fg;
-
- PropertySet infprops;
- infprops.Set("verbose", (size_t)1);
- infprops.Set("updates", string("HUGIN"));
- InfAlg* inf = newInfAlg("JTREE", fg, infprops);
- inf->init();
-
- Evidence e;
- ifstream estream(argv[2]);
- e.addEvidenceTabFile(estream, fg);
-
- cout << "Number of samples: " << e.nrSamples() << endl;
- Evidence::iterator ps = e.begin();
- for (; ps != e.end(); ps++) {
- cout << "Sample " << ps->first << " has "
- << ps->second.observations().size() << " observations." << endl;
- }
-
- ifstream emstream(argv[3]);
- EMAlg em(e, *inf, emstream);
-
- while(!em.hasSatisfiedTermConditions()) {
- Real l = em.iterate();
- cout << "Iteration " << em.getCurrentIters() << " likelihood: " << l <<endl;
- }
-
- cout << endl
- << "Inferred Factor Graph:" << endl
- << "######################" << endl
- << inf->fg();
-
- return 0;
+
+int main( int argc, char** argv ) {
+ if( argc != 4 )
+ usage("Incorrect number of arguments.");
+
+ FactorGraph fg;
+ ifstream fgstream( argv[1] );
+ fgstream >> fg;
+
+ PropertySet infprops;
+ infprops.Set( "verbose", (size_t)1 );
+ infprops.Set( "updates", string("HUGIN") );
+ InfAlg* inf = newInfAlg( "JTREE", fg, infprops );
+ inf->init();
+
+ Evidence e;
+ ifstream estream( argv[2] );
+ e.addEvidenceTabFile( estream, fg );
+
+ cout << "Number of samples: " << e.nrSamples() << endl;
+ Evidence::iterator ps = e.begin();
+ for( ; ps != e.end(); ps++ )
+ cout << "Sample " << ps->first << " has " << ps->second.observations().size() << " observations." << endl;
+
+ ifstream emstream( argv[3] );
+ EMAlg em(e, *inf, emstream);
+
+ while( !em.hasSatisfiedTermConditions() ) {
+ Real l = em.iterate();
+ cout << "Iteration " << em.getCurrentIters() << " likelihood: " << l <<endl;
+ }
+
+ cout << endl << "Inferred Factor Graph:" << endl << "######################" << endl << inf->fg();
+
+ return 0;
}