Improve documentation:
- evidence.h
emalg.h
doc.h
*/
-/** \page fileformat libDAI factorgraph file format
+/** \page fileformats libDAI file formats
*
- * This page describes the .fg fileformat used in libDAI to store factor graphs.
+ * \section fileformats-factorgraph Factor graph (.fg) file format
+ *
+ * This section describes the .fg file format used in libDAI to store factor graphs.
* Markov Random Fields are special cases of factor graphs, as are Bayesian
* networks. A factor graph can be specified as follows: for each factor, one has
* to specify which variables occur in the factor, what their respective
* values the fastest (similar to MATLAB indexing of multidimensional arrays). An
* example block describing one factor is:
*
- * 3\n
- * 4 8 7\n
- * 3 2 2\n
- * 11\n
- * 0 0.1\n
- * 1 3.5\n
- * 2 2.8\n
- * 3 6.3\n
- * 4 8.4\n
- * 6 7.4\n
- * 7 2.4\n
- * 8 8.9\n
- * 9 1.3\n
- * 10 1.6\n
- * 12 6.4\n
- * 11 2.6\n
+ * <pre>
+ * 3
+ * 4 8 7
+ * 3 2 2
+ * 11
+ * 0 0.1
+ * 1 3.5
+ * 2 2.8
+ * 3 6.3
+ * 4 8.4
+ * 6 7.4
+ * 7 2.4
+ * 8 8.9
+ * 9 1.3
+ * 10 1.6
+ * 12 6.4
+ * 11 2.6
+ * </pre>
*
* which corresponds to the following factor:
*
* \end{array}
* \f]
*
- * Note that the value of x_4 changes fastest, followed by that of x_8, and x_7
+ * Note that the value of \f$x_4\f$ changes fastest, followed by that of \f$x_8\f$, and \f$x_7\f$
* varies the slowest, corresponding to the second line of the block ("4 8 7").
- * Further, x_4 can take on three values, and x_8 and x_7 each have two possible
+ * Further, \f$x_4\f$ can take on three values, and \f$x_8\f$ and \f$x_7\f$ each have two possible
* values, as described in the third line of the block ("3 2 2"). The table
* contains 11 non-zero entries (all except for the fifth entry). Note that the
* eleventh and twelveth entries are interchanged.
*
* A final note: the internal representation in libDAI of the factor above is
* different, because the variables are ordered according to their indices
- * (i.e., the ordering would be x_4 x_7 x_8) and the values of the table are
+ * (i.e., the ordering would be \f$x_4 x_7 x_8\f$) and the values of the table are
* stored accordingly, with the variable having the smallest index changing
* fastest:
*
* 2 & 1 & 1 & 2.6
* \end{array}
* \f]
+ *
+ *
+ * \section fileformats-evidence Evidence (.tab) file format
+ *
+ * This page describes the .tab fileformat used in libDAI to store "evidence",
+ * i.e., a data set consisting of multiple samples, where each sample is the
+ * observed joint state of some variables.
+ *
+ * A .tab file is a tabular data file, consisting of a header line followed by
+ * one line for each data sample. Each line should have the same number of columns,
+ * where columns are separated by one tab character. Each column corresponds to
+ * a variable. The header line consists of the variable labels (corresponding to
+ * Var::label()). The other lines are observed joint states of the variables, i.e.,
+ * each line corresponds to a joint observation of the variables, and each column
+ * of a line contains the state of the variable associated with that column.
+ * Missing data is handled simply by having two consecutive tab characters,
+ * without any characters in between.
+ *
+ * \par Example:
+ *
+ * <pre>
+ * 1 3 2
+ * 0 0 1
+ * 1 0 1
+ * 1 1
+ * </pre>
+ *
+ * This would correspond to a data set consisting of three observations concerning
+ * the variables with labels 1, 3 and 2; the first observation being
+ * \f$x_1 = 0, x_3 = 0, x_2 = 1\f$, the second observation being
+ * \f$x_1 = 1, x_3 = 0, x_2 = 1\f$, and the third observation being
+ * \f$x_1 = 1, x_2 = 1\f$ (where the state of \f$x_3\f$ is missing).
*/
-
- /** \page license License
+/** \page license License
<b>libDAI is licensed under the GNU General Public License version 2, or
(at your option) any later version. The complete license text is
/// \file
-/** \brief Defines classes Evidence and Observation
- * \todo Describe tabular data file format
- * \todo Improve documentation
- * \author Charles Vaske
- */
+/// \brief Defines class Evidence, which stores multiple observations of joint states of variables
#ifndef __defined_libdai_evidence_h
namespace dai {
-/// Stores observed values of a subset of variables
-/** \author Charles Vaske
- */
-class Observation {
- private:
- /// Used to store the state of some variables
- std::map<Var, size_t> _obs;
-
- public:
- /// Default constructor
- Observation() : _obs() {}
-
- /// Get all observations
- const std::map<Var, size_t>& observations() const { return _obs; }
-
- /// Add an observation
- void addObservation( Var node, size_t setting );
-
- /// Clamp variables in the graphical model to their observed values
- void applyEvidence( InfAlg& alg ) const;
-};
+/// Stores joint state of a set of variables
+typedef std::map<Var, size_t> Observation;
-/// Stores multiple joint observations of sets of variables.
-/** The Evidence class stores multiple samples, where each sample is the joint
- * observation of the states of some variables.
+/// Stores a data set consisting of multiple samples, where each sample is the observed joint state of some variables.
+/** \note Each sample can describe the joint state of a different set of variables,
+ * in order to be able to deal with missing data.
*
* \author Charles Vaske
*/
class Evidence {
private:
- /// Each sample is the joint observation of the states of some variables
+ /// Each sample is an observed joint state of some variables
std::vector<Observation> _samples;
public:
/// Default constructor
Evidence() : _samples() {}
- /// Construct from existing samples
+ /// Construct from \a samples
Evidence( std::vector<Observation> &samples ) : _samples(samples) {}
- /// Read in tabular data from a stream.
- /** Each line contains one sample, and the first line is a header line with names.
- */
- void addEvidenceTabFile( std::istream& is, std::map<std::string, Var> &varMap );
-
- /// Read in tabular data from a stream.
- /** Each line contains one sample, and the first line is a header line with
- * variable labels which should correspond with a subset of the variables in fg.
+ /// Read in tabular data from a stream and add the read samples to \c *this.
+ /** \param is Input stream in .tab file format, describing joint observations of variables in \a fg
+ * \param fg Factor graph describing the corresponding variables
+ * \see \ref fileformats-evidence
*/
void addEvidenceTabFile( std::istream& is, FactorGraph& fg );
/// \name Iterator interface
//@{
- /// Iterator over the elements
+ /// Iterator over the samples
typedef std::vector<Observation>::iterator iterator;
- /// Constant iterator over the elements
+ /// Constant iterator over the samples
typedef std::vector<Observation>::const_iterator const_iterator;
- /// Returns iterator that points to the first element
+ /// Returns iterator that points to the first sample
iterator begin() { return _samples.begin(); }
- /// Returns constant iterator that points to the first element
+ /// Returns constant iterator that points to the first sample
const_iterator begin() const { return _samples.begin(); }
- /// Returns iterator that points beyond the last element
+ /// Returns iterator that points beyond the last sample
iterator end() { return _samples.end(); }
- /// Returns constant iterator that points beyond the last element
+ /// Returns constant iterator that points beyond the last sample
const_iterator end() const { return _samples.end(); }
//@}
+
+ private:
+ /// Read in tabular data from a stream and add the read samples to \c *this.
+ void addEvidenceTabFile( std::istream& is, std::map<std::string, Var> &varMap );
};
/// \name Input/Output
//@{
/// Reads a factor graph from a file
- /** \see \ref fileformat
+ /** \see \ref fileformats-factorgraph
*/
void ReadFromFile( const char *filename );
/// Writes a factor graph to a file
- /** \see \ref fileformat
+ /** \see \ref fileformats-factorgraph
*/
void WriteToFile( const char *filename, size_t precision=15 ) const;
/// Writes a factor graph to an output stream
- /** \see \ref fileformat
+ /** \see \ref fileformats-factorgraph
*/
friend std::ostream& operator<< (std::ostream &os, const FactorGraph &fg );
/// Reads a factor graph from an input stream
- /** \see \ref fileformat
+ /** \see \ref fileformats-factorgraph
*/
friend std::istream& operator>> (std::istream &is, FactorGraph &fg );
// Expectation calculation
for( Evidence::const_iterator e = _evidence.begin(); e != _evidence.end(); ++e ) {
InfAlg* clamped = _estep.clone();
- e->applyEvidence( *clamped );
+ // Apply evidence
+ for( Observation::const_iterator i = e->begin(); i != e->end(); ++i )
+ clamped->clamp( clamped->fg().findVar(i->first), i->second );
clamped->init();
clamped->run();
namespace dai {
-void Observation::addObservation( Var node, size_t setting ) {
- _obs[node] = setting;
-}
-
-
-void Observation::applyEvidence( InfAlg &alg ) const {
- for( std::map<Var, size_t>::const_iterator i = _obs.begin(); i != _obs.end(); ++i )
- alg.clamp( alg.fg().findVar(i->first), i->second );
-}
-
-
void Evidence::addEvidenceTabFile( std::istream &is, FactorGraph &fg ) {
std::map<std::string, Var> varMap;
for( std::vector<Var>::const_iterator v = fg.vars().begin(); v != fg.vars().end(); ++v ) {
size_t state = atoi( fields[i].c_str() );
if( state >= vars[i].states() )
DAI_THROW(INVALID_EVIDENCE_FILE);
- sampleData.addObservation( vars[i], state );
+ sampleData[vars[i]] = state;
}
}
_samples.push_back( sampleData );
cout << "Number of samples: " << e.nrSamples() << endl;
for( Evidence::iterator ps = e.begin(); ps != e.end(); ps++ )
- cout << "Sample #" << (ps - e.begin()) << " has " << ps->observations().size() << " observations." << endl;
+ cout << "Sample #" << (ps - e.begin()) << " has " << ps->size() << " observations." << endl;
ifstream emstream( argv[3] );
EMAlg em(e, *inf, emstream);