1 /* This file is part of libDAI - http://www.libdai.org/
2 *
4 * 2, or (at your option) any later version. libDAI is distributed without any
5 * warranty. See the file COPYING for more details.
6 *
7 * Copyright (C) 2002 Martijn Leisink [martijn@mbfys.kun.nl]
8 * Copyright (C) 2006-2009 Joris Mooij [joris dot mooij at libdai dot org]
10 */
13 /// \file
14 /// \brief Defines TFactor<> and Factor classes which represent factors in probability distributions.
17 #ifndef __defined_libdai_factor_h
18 #define __defined_libdai_factor_h
21 #include <iostream>
22 #include <functional>
23 #include <cmath>
24 #include <dai/prob.h>
25 #include <dai/varset.h>
26 #include <dai/index.h>
27 #include <dai/util.h>
30 namespace dai {
33 /// Represents a (probability) factor.
34 /** Mathematically, a \e factor is a function mapping joint states of some
35 * variables to the nonnegative real numbers.
36 * More formally, denoting a discrete variable with label \f$l\f$ by
37 * \f$x_l\f$ and its state space by \f$X_l = \{0,1,\dots,S_l-1\}\f$,
38 * a factor depending on the variables \f$\{x_l\}_{l\in L}\f$ is
39 * a function \f$f_L : \prod_{l\in L} X_l \to [0,\infty)\f$.
40 *
41 * In libDAI, a factor is represented by a TFactor<T> object, which has two
42 * components:
43 * \arg a VarSet, corresponding with the set of variables \f$\{x_l\}_{l\in L}\f$
44 * that the factor depends on;
45 * \arg a TProb, a vector containing the value of the factor for each possible
46 * joint state of the variables.
47 *
48 * The factor values are stored in the entries of the TProb in a particular
49 * ordering, which is defined by the one-to-one correspondence of a joint state
50 * in \f$\prod_{l\in L} X_l\f$ with a linear index in
51 * \f$\{0,1,\dots,\prod_{l\in L} S_l-1\}\f$ according to the mapping \f$\sigma\f$
52 * induced by dai::calcLinearState().
53 *
54 * \tparam T Should be a scalar that is castable from and to double and should support elementary arithmetic operations.
55 * \todo Define a better fileformat for .fg files (maybe using XML)?
56 * \todo Add support for sparse factors.
57 */
58 template <typename T> class TFactor {
59 private:
60 /// Stores the variables on which the factor depends
61 VarSet _vs;
62 /// Stores the factor values
63 TProb<T> _p;
65 public:
66 /// \name Constructors and destructors
67 //@{
68 /// Constructs factor depending on no variables with value \a p
69 TFactor ( T p = 1 ) : _vs(), _p(1,p) {}
71 /// Constructs factor depending on the variable \a v with uniform distribution
72 TFactor( const Var &v ) : _vs(v), _p(v.states()) {}
74 /// Constructs factor depending on variables in \a vars with uniform distribution
75 TFactor( const VarSet& vars ) : _vs(vars), _p(_vs.nrStates()) {}
77 /// Constructs factor depending on variables in \a vars with all values set to \a p
78 TFactor( const VarSet& vars, T p ) : _vs(vars), _p(_vs.nrStates(),p) {}
80 /// Constructs factor depending on variables in \a vars, copying the values from a std::vector<>
81 /** \tparam S Type of values of \a x
82 * \param vars contains the variables that the new factor should depend on.
83 * \param x Vector with values to be copied.
84 */
85 template<typename S>
86 TFactor( const VarSet& vars, const std::vector<S> &x ) : _vs(vars), _p(x.begin(), x.begin() + _vs.nrStates(), _vs.nrStates()) {
87 DAI_ASSERT( x.size() == vars.nrStates() );
88 }
90 /// Constructs factor depending on variables in \a vars, copying the values from an array
91 /** \param vars contains the variables that the new factor should depend on.
92 * \param p Points to array of values to be added.
93 */
94 TFactor( const VarSet& vars, const T* p ) : _vs(vars), _p(p, p + _vs.nrStates(), _vs.nrStates()) {}
96 /// Constructs factor depending on variables in \a vars, copying the values from \a p
97 TFactor( const VarSet& vars, const TProb<T> &p ) : _vs(vars), _p(p) {
98 DAI_DEBASSERT( _vs.nrStates() == _p.size() );
99 }
101 /// Constructs factor depending on variables in \a vars, permuting the values given in \a p accordingly
102 TFactor( const std::vector<Var> &vars, const std::vector<T> &p ) : _vs(vars.begin(), vars.end(), vars.size()), _p(p.size()) {
103 Permute permindex(vars);
104 for( size_t li = 0; li < p.size(); ++li )
105 _p[permindex.convertLinearIndex(li)] = p[li];
106 }
107 //@}
109 /// \name Queries
110 //@{
111 /// Returns constant reference to value vector
112 const TProb<T>& p() const { return _p; }
114 /// Returns reference to value vector
115 TProb<T>& p() { return _p; }
117 /// Returns a copy of the \a i 'th entry of the value vector
118 T operator[] (size_t i) const { return _p[i]; }
120 /// Returns a reference to the \a i 'th entry of the value vector
121 T& operator[] (size_t i) { return _p[i]; }
123 /// Returns constant reference to variable set (i.e., the variables on which the factor depends)
124 const VarSet& vars() const { return _vs; }
126 /// Returns reference to variable set (i.e., the variables on which the factor depends)
127 VarSet& vars() { return _vs; }
129 /// Returns the number of possible joint states of the variables on which the factor depends, \f$\prod_{l\in L} S_l\f$
130 /** \note This is equal to the length of the value vector.
131 */
132 size_t states() const { return _p.size(); }
134 /// Returns the Shannon entropy of \c *this, \f$-\sum_i p_i \log p_i\f$
135 T entropy() const { return _p.entropy(); }
137 /// Returns maximum of all values
138 T max() const { return _p.max(); }
140 /// Returns minimum of all values
141 T min() const { return _p.min(); }
143 /// Returns sum of all values
144 T sum() const { return _p.sum(); }
146 /// Returns maximum absolute value of all values
147 T maxAbs() const { return _p.maxAbs(); }
149 /// Returns \c true if one or more values are NaN
150 bool hasNaNs() const { return _p.hasNaNs(); }
152 /// Returns \c true if one or more values are negative
153 bool hasNegatives() const { return _p.hasNegatives(); }
155 /// Returns strength of this factor (between variables \a i and \a j), as defined in eq. (52) of [\ref MoK07b]
156 T strength( const Var &i, const Var &j ) const;
157 //@}
159 /// \name Unary transformations
160 //@{
161 /// Returns pointwise absolute value
162 TFactor<T> abs() const {
163 // Note: the alternative (shorter) way of implementing this,
164 // return TFactor<T>( _vs, _p.abs() );
165 // is slower because it invokes the copy constructor of TProb<T>
166 TFactor<T> x;
167 x._vs = _vs;
168 x._p = _p.abs();
169 return x;
170 }
172 /// Returns pointwise exponent
173 TFactor<T> exp() const {
174 TFactor<T> x;
175 x._vs = _vs;
176 x._p = _p.exp();
177 return x;
178 }
180 /// Returns pointwise logarithm
181 /** If \a zero == \c true, uses <tt>log(0)==0</tt>; otherwise, <tt>log(0)==-Inf</tt>.
182 */
183 TFactor<T> log(bool zero=false) const {
184 TFactor<T> x;
185 x._vs = _vs;
186 x._p = _p.log(zero);
187 return x;
188 }
190 /// Returns pointwise inverse
191 /** If \a zero == \c true, uses <tt>1/0==0</tt>; otherwise, <tt>1/0==Inf</tt>.
192 */
193 TFactor<T> inverse(bool zero=true) const {
194 TFactor<T> x;
195 x._vs = _vs;
196 x._p = _p.inverse(zero);
197 return x;
198 }
200 /// Returns normalized copy of \c *this, using the specified norm
201 /** \throw NOT_NORMALIZABLE if the norm is zero
202 */
203 TFactor<T> normalized( typename TProb<T>::NormType norm=TProb<T>::NORMPROB ) const {
204 TFactor<T> x;
205 x._vs = _vs;
206 x._p = _p.normalized( norm );
207 return x;
208 }
209 //@}
211 /// \name Unary operations
212 //@{
213 /// Draws all values i.i.d. from a uniform distribution on [0,1)
214 TFactor<T> & randomize () { _p.randomize(); return *this; }
216 /// Sets all values to \f$1/n\f$ where \a n is the number of states
217 TFactor<T>& setUniform () { _p.setUniform(); return *this; }
219 /// Normalizes factor using the specified norm
220 /** \throw NOT_NORMALIZABLE if the norm is zero
221 */
222 T normalize( typename TProb<T>::NormType norm=TProb<T>::NORMPROB ) { return _p.normalize( norm ); }
223 //@}
225 /// \name Operations with scalars
226 //@{
227 /// Sets all values to \a x
228 TFactor<T> & fill (T x) { _p.fill( x ); return *this; }
230 /// Adds scalar \a x to each value
231 TFactor<T>& operator+= (T x) { _p += x; return *this; }
233 /// Subtracts scalar \a x from each value
234 TFactor<T>& operator-= (T x) { _p -= x; return *this; }
236 /// Multiplies each value with scalar \a x
237 TFactor<T>& operator*= (T x) { _p *= x; return *this; }
239 /// Divides each entry by scalar \a x
240 TFactor<T>& operator/= (T x) { _p /= x; return *this; }
242 /// Raises values to the power \a x
243 TFactor<T>& operator^= (T x) { _p ^= x; return *this; }
244 //@}
246 /// \name Transformations with scalars
247 //@{
248 /// Returns sum of \c *this and scalar \a x
249 TFactor<T> operator+ (T x) const {
250 // Note: the alternative (shorter) way of implementing this,
251 // TFactor<T> result(*this);
252 // result._p += x;
253 // is slower because it invokes the copy constructor of TFactor<T>
254 TFactor<T> result;
255 result._vs = _vs;
256 result._p = p() + x;
257 return result;
258 }
260 /// Returns difference of \c *this and scalar \a x
261 TFactor<T> operator- (T x) const {
262 TFactor<T> result;
263 result._vs = _vs;
264 result._p = p() - x;
265 return result;
266 }
268 /// Returns product of \c *this with scalar \a x
269 TFactor<T> operator* (T x) const {
270 TFactor<T> result;
271 result._vs = _vs;
272 result._p = p() * x;
273 return result;
274 }
276 /// Returns quotient of \c *this with scalar \a x
277 TFactor<T> operator/ (T x) const {
278 TFactor<T> result;
279 result._vs = _vs;
280 result._p = p() / x;
281 return result;
282 }
284 /// Returns \c *this raised to the power \a x
285 TFactor<T> operator^ (T x) const {
286 TFactor<T> result;
287 result._vs = _vs;
288 result._p = p() ^ x;
289 return result;
290 }
291 //@}
293 /// \name Operations with other factors
294 //@{
295 /// Applies binary operation \a op on two factors, \c *this and \a g
296 /** \tparam binOp Type of function object that accepts two arguments of type \a T and outputs a type \a T
297 * \param g Right operand
298 * \param op Operation of type \a binOp
299 */
300 template<typename binOp> TFactor<T>& binaryOp( const TFactor<T> &g, binOp op ) {
301 if( _vs == g._vs ) // optimize special case
302 _p.pwBinaryOp( g._p, op );
303 else {
304 TFactor<T> f(*this); // make a copy
305 _vs |= g._vs;
306 size_t N = _vs.nrStates();
308 IndexFor i_f( f._vs, _vs );
309 IndexFor i_g( g._vs, _vs );
311 _p.p().clear();
312 _p.p().reserve( N );
313 for( size_t i = 0; i < N; i++, ++i_f, ++i_g )
314 _p.p().push_back( op( f._p[i_f], g._p[i_g] ) );
315 }
316 return *this;
317 }
319 /// Adds \a g to \c *this
320 /** The sum of two factors is defined as follows: if
321 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
322 * \f[f+g : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) + g(x_M).\f]
323 */
324 TFactor<T>& operator+= (const TFactor<T>& g) { return binaryOp( g, std::plus<T>() ); }
326 /// Subtracts \a g from \c *this
327 /** The difference of two factors is defined as follows: if
328 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
329 * \f[f-g : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) - g(x_M).\f]
330 */
331 TFactor<T>& operator-= (const TFactor<T>& g) { return binaryOp( g, std::minus<T>() ); }
333 /// Multiplies \c *this with \a g
334 /** The product of two factors is defined as follows: if
335 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
336 * \f[fg : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) g(x_M).\f]
337 */
338 TFactor<T>& operator*= (const TFactor<T>& g) { return binaryOp( g, std::multiplies<T>() ); }
340 /// Divides \c *this by \a g (where division by zero yields zero)
341 /** The quotient of two factors is defined as follows: if
342 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
343 * \f[\frac{f}{g} : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto \frac{f(x_L)}{g(x_M)}.\f]
344 */
345 TFactor<T>& operator/= (const TFactor<T>& g) { return binaryOp( g, fo_divides0<T>() ); }
346 //@}
348 /// \name Transformations with other factors
349 //@{
350 /// Returns result of applying binary operation \a op on two factors, \c *this and \a g
351 /** \tparam binOp Type of function object that accepts two arguments of type \a T and outputs a type \a T
352 * \param g Right operand
353 * \param op Operation of type \a binOp
354 */
355 template<typename binOp> TFactor<T> binaryTr( const TFactor<T> &g, binOp op ) const {
356 // Note that to prevent a copy to be made, it is crucial
357 // that the result is declared outside the if-else construct.
358 TFactor<T> result;
359 if( _vs == g._vs ) { // optimize special case
360 result._vs = _vs;
361 result._p = _p.pwBinaryTr( g._p, op );
362 } else {
363 result._vs = _vs | g._vs;
364 size_t N = result._vs.nrStates();
366 IndexFor i_f( _vs, result.vars() );
367 IndexFor i_g( g._vs, result.vars() );
369 result._p.p().clear();
370 result._p.p().reserve( N );
371 for( size_t i = 0; i < N; i++, ++i_f, ++i_g )
372 result._p.p().push_back( op( _p[i_f], g._p[i_g] ) );
373 }
374 return result;
375 }
377 /// Returns sum of \c *this and \a g
378 /** The sum of two factors is defined as follows: if
379 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
380 * \f[f+g : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) + g(x_M).\f]
381 */
382 TFactor<T> operator+ (const TFactor<T>& g) const {
383 return binaryTr(g,std::plus<T>());
384 }
386 /// Returns \c *this minus \a g
387 /** The difference of two factors is defined as follows: if
388 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
389 * \f[f-g : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) - g(x_M).\f]
390 */
391 TFactor<T> operator- (const TFactor<T>& g) const {
392 return binaryTr(g,std::minus<T>());
393 }
395 /// Returns product of \c *this with \a g
396 /** The product of two factors is defined as follows: if
397 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
398 * \f[fg : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto f(x_L) g(x_M).\f]
399 */
400 TFactor<T> operator* (const TFactor<T>& g) const {
401 return binaryTr(g,std::multiplies<T>());
402 }
404 /// Returns quotient of \c *this by \a f (where division by zero yields zero)
405 /** The quotient of two factors is defined as follows: if
406 * \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$g : \prod_{m\in M} X_m \to [0,\infty)\f$, then
407 * \f[\frac{f}{g} : \prod_{l\in L\cup M} X_l \to [0,\infty) : x \mapsto \frac{f(x_L)}{g(x_M)}.\f]
408 */
409 TFactor<T> operator/ (const TFactor<T>& g) const {
410 return binaryTr(g,fo_divides0<T>());
411 }
412 //@}
414 /// \name Miscellaneous operations
415 //@{
416 /// Returns a slice of \c *this, where the subset \a vars is in state \a varsState
417 /** \pre \a vars sould be a subset of vars()
418 * \pre \a varsState < vars.states()
419 *
420 * The result is a factor that depends on the variables of *this except those in \a vars,
421 * obtained by setting the variables in \a vars to the joint state specified by the linear index
422 * \a varsState. Formally, if \c *this corresponds with the factor \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$,
423 * \f$M \subset L\f$ corresponds with \a vars and \a varsState corresponds with a mapping \f$s\f$ that
424 * maps a variable \f$x_m\f$ with \f$m\in M\f$ to its state \f$s(x_m) \in X_m\f$, then the slice
425 * returned corresponds with the factor \f$g : \prod_{l \in L \setminus M} X_l \to [0,\infty)\f$
426 * defined by \f$g(\{x_l\}_{l\in L \setminus M}) = f(\{x_l\}_{l\in L \setminus M}, \{s(x_m)\}_{m\in M})\f$.
427 */
428 TFactor<T> slice( const VarSet& vars, size_t varsState ) const;
430 /// Embeds this factor in a larger VarSet
431 /** \pre vars() should be a subset of \a vars
432 *
433 * If *this corresponds with \f$f : \prod_{l\in L} X_l \to [0,\infty)\f$ and \f$L \subset M\f$, then
434 * the embedded factor corresponds with \f$g : \prod_{m\in M} X_m \to [0,\infty) : x \mapsto f(x_L)\f$.
435 */
436 TFactor<T> embed(const VarSet & vars) const {
437 DAI_ASSERT( vars >> _vs );
438 if( _vs == vars )
439 return *this;
440 else
441 return (*this) * TFactor<T>(vars / _vs, (T)1);
442 }
444 /// Returns marginal on \a vars, obtained by summing out all variables except those in \a vars, and normalizing the result if \a normed == \c true
445 TFactor<T> marginal(const VarSet &vars, bool normed=true) const;
447 /// Returns max-marginal on \a vars, obtained by maximizing all variables except those in \a vars, and normalizing the result if \a normed == \c true
448 TFactor<T> maxMarginal(const VarSet &vars, bool normed=true) const;
449 //@}
450 };
453 template<typename T> TFactor<T> TFactor<T>::slice( const VarSet& vars, size_t varsState ) const {
454 DAI_ASSERT( vars << _vs );
455 VarSet varsrem = _vs / vars;
456 TFactor<T> result( varsrem, T(0) );
458 // OPTIMIZE ME
459 IndexFor i_vars (vars, _vs);
460 IndexFor i_varsrem (varsrem, _vs);
461 for( size_t i = 0; i < states(); i++, ++i_vars, ++i_varsrem )
462 if( (size_t)i_vars == varsState )
463 result._p[i_varsrem] = _p[i];
465 return result;
466 }
469 template<typename T> TFactor<T> TFactor<T>::marginal(const VarSet &vars, bool normed) const {
470 VarSet res_vars = vars & _vs;
472 TFactor<T> res( res_vars, 0.0 );
474 IndexFor i_res( res_vars, _vs );
475 for( size_t i = 0; i < _p.size(); i++, ++i_res )
476 res._p[i_res] += _p[i];
478 if( normed )
479 res.normalize( TProb<T>::NORMPROB );
481 return res;
482 }
485 template<typename T> TFactor<T> TFactor<T>::maxMarginal(const VarSet &vars, bool normed) const {
486 VarSet res_vars = vars & _vs;
488 TFactor<T> res( res_vars, 0.0 );
490 IndexFor i_res( res_vars, _vs );
491 for( size_t i = 0; i < _p.size(); i++, ++i_res )
492 if( _p[i] > res._p[i_res] )
493 res._p[i_res] = _p[i];
495 if( normed )
496 res.normalize( TProb<T>::NORMPROB );
498 return res;
499 }
502 template<typename T> T TFactor<T>::strength( const Var &i, const Var &j ) const {
503 DAI_DEBASSERT( _vs.contains( i ) );
504 DAI_DEBASSERT( _vs.contains( j ) );
505 DAI_DEBASSERT( i != j );
506 VarSet ij(i, j);
508 T max = 0.0;
509 for( size_t alpha1 = 0; alpha1 < i.states(); alpha1++ )
510 for( size_t alpha2 = 0; alpha2 < i.states(); alpha2++ )
511 if( alpha2 != alpha1 )
512 for( size_t beta1 = 0; beta1 < j.states(); beta1++ )
513 for( size_t beta2 = 0; beta2 < j.states(); beta2++ )
514 if( beta2 != beta1 ) {
515 size_t as = 1, bs = 1;
516 if( i < j )
517 bs = i.states();
518 else
519 as = j.states();
520 T f1 = slice( ij, alpha1 * as + beta1 * bs ).p().divide( slice( ij, alpha2 * as + beta1 * bs ).p() ).max();
521 T f2 = slice( ij, alpha2 * as + beta2 * bs ).p().divide( slice( ij, alpha1 * as + beta2 * bs ).p() ).max();
522 T f = f1 * f2;
523 if( f > max )
524 max = f;
525 }
527 return std::tanh( 0.25 * std::log( max ) );
528 }
531 /// Writes a factor to an output stream
532 /** \relates TFactor
533 */
534 template<typename T> std::ostream& operator<< (std::ostream& os, const TFactor<T>& f) {
535 os << "(" << f.vars() << ", (";
536 for( size_t i = 0; i < f.states(); i++ )
537 os << (i == 0 ? "" : ", ") << f[i];
538 os << "))";
539 return os;
540 }
543 /// Returns distance between two factors \a f and \a g, according to the distance measure \a dt
544 /** \relates TFactor
545 * \pre f.vars() == g.vars()
546 */
547 template<typename T> T dist( const TFactor<T> &f, const TFactor<T> &g, typename TProb<T>::DistType dt ) {
548 if( f.vars().empty() || g.vars().empty() )
549 return -1;
550 else {
551 DAI_DEBASSERT( f.vars() == g.vars() );
552 return dist( f.p(), g.p(), dt );
553 }
554 }
557 /// Returns the pointwise maximum of two factors
558 /** \relates TFactor
559 * \pre f.vars() == g.vars()
560 */
561 template<typename T> TFactor<T> max( const TFactor<T> &f, const TFactor<T> &g ) {
562 DAI_ASSERT( f._vs == g._vs );
563 return TFactor<T>( f._vs, max( f.p(), g.p() ) );
564 }
567 /// Returns the pointwise minimum of two factors
568 /** \relates TFactor
569 * \pre f.vars() == g.vars()
570 */
571 template<typename T> TFactor<T> min( const TFactor<T> &f, const TFactor<T> &g ) {
572 DAI_ASSERT( f._vs == g._vs );
573 return TFactor<T>( f._vs, min( f.p(), g.p() ) );
574 }
577 /// Calculates the mutual information between the two variables that \a f depends on, under the distribution given by \a f
578 /** \relates TFactor
579 * \pre f.vars().size() == 2
580 */
581 template<typename T> T MutualInfo(const TFactor<T> &f) {
582 DAI_ASSERT( f.vars().size() == 2 );
583 VarSet::const_iterator it = f.vars().begin();
584 Var i = *it; it++; Var j = *it;
585 TFactor<T> projection = f.marginal(i) * f.marginal(j);
586 return dist( f.normalized(), projection, TProb<T>::DISTKL );
587 }
590 /// Represents a factor with values of type dai::Real.
591 typedef TFactor<Real> Factor;
594 } // end of namespace dai
597 #endif