1 /* This file is part of libDAI - http://www.libdai.org/
3 * libDAI is licensed under the terms of the GNU General Public License version
4 * 2, or (at your option) any later version. libDAI is distributed without any
5 * warranty. See the file COPYING for more details.
7 * Copyright (C) 2009 Charles Vaske [cvaske at soe dot ucsc dot edu]
8 * Copyright (C) 2009 University of California, Santa Cruz
12 #ifndef __defined_libdai_emalg_h
13 #define __defined_libdai_emalg_h
19 #include <dai/factor.h>
20 #include <dai/daialg.h>
21 #include <dai/evidence.h>
22 #include <dai/index.h>
23 #include <dai/properties.h>
27 /// \brief Defines classes related to Expectation Maximization (EMAlg, ParameterEstimation, CondProbEstimation and SharedParameters)
33 /// Base class for parameter estimation methods.
34 /** This class defines the general interface of parameter estimation methods.
36 * Implementations of this interface (see e.g. CondProbEstimation) should
37 * register a factory function (virtual constructor) via the static
38 * registerMethod() function.
39 * This factory function should return a pointer to a newly constructed
40 * object, whose type is a subclass of ParameterEstimation, and gets as
41 * input a PropertySet of parameters. After a subclass has been registered,
42 * instances of it can be constructed using the construct() method.
44 * Implementations are responsible for collecting data from a probability
45 * vector passed to it from a SharedParameters container object.
47 * The default registry only contains CondProbEstimation, named
48 * "ConditionalProbEstimation".
50 * \author Charles Vaske
52 class ParameterEstimation
{
54 /// Type of pointer to factory function.
55 typedef ParameterEstimation
* (*ParamEstFactory
)( const PropertySet
& );
57 /// Virtual destructor for deleting pointers to derived classes.
58 virtual ~ParameterEstimation() {}
60 /// Virtual copy constructor.
61 virtual ParameterEstimation
* clone() const = 0;
63 /// General factory method that constructs the desired ParameterEstimation subclass
64 /** \param method Name of the subclass that should be constructed;
65 * \param p Parameters passed to constructor of subclass.
66 * \note \a method should either be in the default registry or should be registered first using registerMethod().
67 * \throw UNKNOWN_PARAMETER_ESTIMATION_METHOD if the requested \a method is not registered.
69 static ParameterEstimation
* construct( const std::string
&method
, const PropertySet
&p
);
71 /// Register a subclass so that it can be used with construct().
72 static void registerMethod( const std::string
&method
, const ParamEstFactory
&f
) {
73 if( _registry
== NULL
)
74 loadDefaultRegistry();
75 (*_registry
)[method
] = f
;
78 /// Estimate the factor using the accumulated sufficient statistics and reset.
79 virtual Prob
estimate() = 0;
81 /// Accumulate the sufficient statistics for \a p.
82 virtual void addSufficientStatistics( const Prob
&p
) = 0;
84 /// Returns the size of the Prob that should be passed to addSufficientStatistics.
85 virtual size_t probSize() const = 0;
88 /// A static registry containing all methods registered so far.
89 static std::map
<std::string
, ParamEstFactory
> *_registry
;
91 /// Registers default ParameterEstimation subclasses (currently, only CondProbEstimation).
92 static void loadDefaultRegistry();
96 /// Estimates the parameters of a conditional probability table, using pseudocounts.
97 /** \author Charles Vaske
99 class CondProbEstimation
: private ParameterEstimation
{
101 /// Number of states of the variable of interest
103 /// Current pseudocounts
105 /// Initial pseudocounts
110 /** For a conditional probability \f$ P( X | Y ) \f$,
111 * \param target_dimension should equal \f$ | X | \f$
112 * \param pseudocounts are the initial pseudocounts, of length \f$ |X| \cdot |Y| \f$
114 CondProbEstimation( size_t target_dimension
, const Prob
&pseudocounts
);
116 /// Virtual constructor, using a PropertySet.
117 /** Some keys in the PropertySet are required.
118 * For a conditional probability \f$ P( X | Y ) \f$,
119 * - \a target_dimension should be equal to \f$ | X | \f$
120 * - \a total_dimension should be equal to \f$ |X| \cdot |Y| \f$
122 * An optional key is:
123 * - \a pseudo_count which specifies the initial counts (defaults to 1)
125 static ParameterEstimation
* factory( const PropertySet
&p
);
127 /// Virtual copy constructor
128 virtual ParameterEstimation
* clone() const { return new CondProbEstimation( _target_dim
, _initial_stats
); }
130 /// Virtual destructor
131 virtual ~CondProbEstimation() {}
133 /// Returns an estimate of the conditional probability distribution.
134 /** The format of the resulting Prob keeps all the values for
135 * \f$ P(X | Y=y) \f$ in sequential order in the array.
137 virtual Prob
estimate();
139 /// Accumulate sufficient statistics from the expectations in \a p
140 virtual void addSufficientStatistics( const Prob
&p
);
142 /// Returns the required size for arguments to addSufficientStatistics().
143 virtual size_t probSize() const { return _stats
.size(); }
147 /// Represents a single factor or set of factors whose parameters should be estimated.
148 /** To ensure that parameters can be shared between different factors during
149 * EM learning, each factor's values are reordered to match a desired variable
150 * ordering. The ordering of the variables in a factor may therefore differ
151 * from the canonical ordering used in libDAI. The SharedParameters
152 * class combines one or more factors (together with the specified orderings
153 * of the variables) with a ParameterEstimation object, taking care of the
154 * necessary permutations of the factor entries / parameters.
156 * \author Charles Vaske
158 class SharedParameters
{
160 /// Convenience label for an index of a factor in a FactorGraph.
161 typedef size_t FactorIndex
;
162 /// Convenience label for a grouping of factor orientations.
163 typedef std::map
<FactorIndex
, std::vector
<Var
> > FactorOrientations
;
166 /// Maps factor indices to the corresponding VarSets
167 std::map
<FactorIndex
, VarSet
> _varsets
;
168 /// Maps factor indices to the corresponding Permute objects that permute the canonical ordering into the desired ordering
169 std::map
<FactorIndex
, Permute
> _perms
;
170 /// Maps factor indices to the corresponding desired variable orderings
171 FactorOrientations _varorders
;
172 /// Parameter estimation method to be used
173 ParameterEstimation
*_estimation
;
174 /// Indicates whether \c *this gets ownership of _estimation
177 /// Calculates the permutation that permutes the canonical ordering into the desired ordering
178 /** \param varOrder Desired ordering of variables
179 * \param outVS Contains variables in \a varOrder represented as a VarSet
180 * \return Permute object for permuting variables in varOrder from the canonical libDAI ordering into the desired ordering
182 static Permute
calculatePermutation( const std::vector
<Var
> &varOrder
, VarSet
&outVS
);
184 /// Initializes _varsets and _perms from _varorders and checks whether their state spaces correspond with _estimation.probSize()
185 void setPermsAndVarSetsFromVarOrders();
189 /** \param varorders all the factor orientations for this parameter
190 * \param estimation a pointer to the parameter estimation method
191 * \param ownPE whether the constructed object gets ownership of \a estimation
193 SharedParameters( const FactorOrientations
&varorders
, ParameterEstimation
*estimation
, bool ownPE
=false );
195 /// Construct a SharedParameters object from an input stream \a is and a factor graph \a fg
196 /** \see \ref fileformats-emalg-sharedparameters
197 * \throw INVALID_EMALG_FILE if the input stream is not valid
199 SharedParameters( std::istream
&is
, const FactorGraph
&fg
);
202 SharedParameters( const SharedParameters
&sp
) : _varsets(sp
._varsets
), _perms(sp
._perms
), _varorders(sp
._varorders
), _estimation(sp
._estimation
), _ownEstimation(sp
._ownEstimation
) {
203 // If sp owns its _estimation object, we should clone it instead of copying the pointer
205 _estimation
= _estimation
->clone();
209 ~SharedParameters() {
210 // If we own the _estimation object, we should delete it now
215 /// Collect the sufficient statistics from expected values (beliefs) according to \a alg
216 /** For each of the relevant factors (that shares the parameters we are interested in),
217 * the corresponding belief according to \a alg is obtained and its entries are permuted
218 * such that their ordering corresponds with the shared parameters that we are estimating.
219 * Then, the parameter estimation subclass method addSufficientStatistics() is called with
220 * this vector of expected values of the parameters as input.
222 void collectSufficientStatistics( InfAlg
&alg
);
224 /// Estimate and set the shared parameters
225 /** Based on the sufficient statistics collected so far, the shared parameters are estimated
226 * using the parameter estimation subclass method estimate(). Then, each of the relevant
227 * factors in \a fg (that shares the parameters we are interested in) is set according
228 * to those parameters (permuting the parameters accordingly).
230 void setParameters( FactorGraph
&fg
);
234 /// A MaximizationStep groups together several parameter estimation tasks (SharedParameters objects) into a single unit.
235 /** \author Charles Vaske
237 class MaximizationStep
{
239 /// Vector of parameter estimation tasks of which this maximization step consists
240 std::vector
<SharedParameters
> _params
;
243 /// Default constructor
244 MaximizationStep() : _params() {}
246 /// Construct MaximizationStep from a vector of parameter estimation tasks
247 MaximizationStep( std::vector
<SharedParameters
> &maximizations
) : _params(maximizations
) {}
249 /// Constructor from an input stream and a corresponding factor graph
250 /** \see \ref fileformats-emalg-maximizationstep
252 MaximizationStep( std::istream
&is
, const FactorGraph
&fg_varlookup
);
254 /// Collect the beliefs from this InfAlg as expectations for the next Maximization step
255 void addExpectations( InfAlg
&alg
);
257 /// Using all of the currently added expectations, make new factors with maximized parameters and set them in the FactorGraph.
258 void maximize( FactorGraph
&fg
);
260 /// \name Iterator interface
262 /// Iterator over the parameter estimation tasks
263 typedef std::vector
<SharedParameters
>::iterator iterator
;
264 /// Constant iterator over the parameter estimation tasks
265 typedef std::vector
<SharedParameters
>::const_iterator const_iterator
;
267 /// Returns iterator that points to the first parameter estimation task
268 iterator
begin() { return _params
.begin(); }
269 /// Returns constant iterator that points to the first parameter estimation task
270 const_iterator
begin() const { return _params
.begin(); }
271 /// Returns iterator that points beyond the last parameter estimation task
272 iterator
end() { return _params
.end(); }
273 /// Returns constant iterator that points beyond the last parameter estimation task
274 const_iterator
end() const { return _params
.end(); }
279 /// EMAlg performs Expectation Maximization to learn factor parameters.
280 /** This requires specifying:
281 * - Evidence (instances of observations from the graphical model),
282 * - InfAlg for performing the E-step (which includes the factor graph),
283 * - a vector of MaximizationStep 's steps to be performed.
285 * This implementation can perform incremental EM by using multiple
286 * MaximizationSteps. An expectation step is performed between execution
287 * of each MaximizationStep. A call to iterate() will cycle through all
288 * MaximizationStep 's. A call to run() will call iterate() until the
289 * termination criteria have been met.
291 * Having multiple and separate maximization steps allows for maximizing some
292 * parameters, performing another E-step, and then maximizing separate
293 * parameters, which may result in faster convergence in some cases.
295 * \author Charles Vaske
299 /// All the data samples used during learning
300 const Evidence
&_evidence
;
302 /// How to do the expectation step
305 /// The maximization steps to take
306 std::vector
<MaximizationStep
> _msteps
;
308 /// Number of iterations done
311 /// History of likelihoods
312 std::vector
<Real
> _lastLogZ
;
314 /// Maximum number of iterations
317 /// Convergence tolerance
321 /// Key for setting maximum iterations
322 static const std::string MAX_ITERS_KEY
;
323 /// Default maximum iterations
324 static const size_t MAX_ITERS_DEFAULT
;
325 /// Key for setting likelihood termination condition
326 static const std::string LOG_Z_TOL_KEY
;
327 /// Default likelihood tolerance
328 static const Real LOG_Z_TOL_DEFAULT
;
330 /// Construct an EMAlg from several objects
331 /** \param evidence Specifies the observed evidence
332 * \param estep Inference algorithm to be used for the E-step
333 * \param msteps Vector of maximization steps, each of which is a group of parameter estimation tasks
334 * \param termconditions Termination conditions @see setTermConditions()
336 EMAlg( const Evidence
&evidence
, InfAlg
&estep
, std::vector
<MaximizationStep
> &msteps
, const PropertySet
&termconditions
)
337 : _evidence(evidence
), _estep(estep
), _msteps(msteps
), _iters(0), _lastLogZ(), _max_iters(MAX_ITERS_DEFAULT
), _log_z_tol(LOG_Z_TOL_DEFAULT
)
339 setTermConditions( termconditions
);
342 /// Construct an EMAlg from Evidence \a evidence, an InfAlg \a estep, and an input stream \a mstep_file
343 /** \see \ref fileformats-emalg
345 EMAlg( const Evidence
&evidence
, InfAlg
&estep
, std::istream
&mstep_file
);
347 /// Change the conditions for termination
348 /** There are two possible parameters in the PropertySet \a p:
349 * - \a max_iters maximum number of iterations
350 * - \a log_z_tol critical proportion of increase in logZ
352 * \see hasSatisifiedTermConditions()
354 void setTermConditions( const PropertySet
&p
);
356 /// Determine if the termination conditions have been met.
357 /** There are two sufficient termination conditions:
358 * -# the maximum number of iterations has been performed
359 * -# the ratio of logZ increase over previous logZ is less than the
361 * \f$ \frac{\log(Z_t) - \log(Z_{t-1})}{| \log(Z_{t-1}) | } < \mathrm{tol} \f$.
363 bool hasSatisfiedTermConditions() const;
365 /// Return the last calculated log likelihood
366 Real
logZ() const { return _lastLogZ
.back(); }
368 /// Returns number of iterations done so far
369 size_t Iterations() const { return _iters
; }
371 /// Get the E-step method used
372 const InfAlg
& eStep() const { return _estep
; }
374 /// Iterate once over all maximization steps
375 /** \return Log-likelihood after iteration
379 /// Iterate over a single MaximizationStep
380 Real
iterate( MaximizationStep
&mstep
);
382 /// Iterate until termination conditions are satisfied
385 /// \name Iterator interface
387 /// Iterator over the maximization steps
388 typedef std::vector
<MaximizationStep
>::iterator s_iterator
;
389 /// Constant iterator over the maximization steps
390 typedef std::vector
<MaximizationStep
>::const_iterator const_s_iterator
;
392 /// Returns iterator that points to the first maximization step
393 s_iterator
s_begin() { return _msteps
.begin(); }
394 /// Returns constant iterator that points to the first maximization step
395 const_s_iterator
s_begin() const { return _msteps
.begin(); }
396 /// Returns iterator that points beyond the last maximization step
397 s_iterator
s_end() { return _msteps
.end(); }
398 /// Returns constant iterator that points beyond the last maximization step
399 const_s_iterator
s_end() const { return _msteps
.end(); }
404 } // end of namespace dai