Added functionality for reading files in the UAI approximate inference challenge...
[libdai.git] / src / io.cpp
1 /* This file is part of libDAI - http://www.libdai.org/
2 *
3 * libDAI is licensed under the terms of the GNU General Public License version
4 * 2, or (at your option) any later version. libDAI is distributed without any
5 * warranty. See the file COPYING for more details.
6 *
7 * Copyright (C) 2008-2010 Joris Mooij [joris dot mooij at libdai dot org]
8 */
9
10
11 #include <dai/io.h>
12 #include <dai/alldai.h>
13 #include <iostream>
14 #include <fstream>
15
16
17 namespace dai {
18
19
20 using namespace std;
21
22
23 void ReadUaiAieFactorGraphFile( const char *filename, size_t verbose, std::vector<Var>& vars, std::vector<Factor>& factors, std::vector<Permute>& permutations ) {
24 vars.clear();
25 factors.clear();
26 permutations.clear();
27
28 // open file
29 ifstream is;
30 is.open( filename );
31 if( is.is_open() ) {
32 size_t nrFacs, nrVars;
33 string line;
34
35 // read header line
36 getline(is,line);
37 if( is.fail() || line.size() == 0 )
38 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"UAI factor graph file should start with nonempty header line");
39 if( line[line.size() - 1] == '\r' )
40 line.resize( line.size() - 1 ); // for DOS text files
41 if( line != "BAYES" && line != "MARKOV" )
42 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"UAI factor graph file should start with \"BAYES\" or \"MARKOV\"");
43 if( verbose >= 2 )
44 cout << "Reading " << line << " network..." << endl;
45
46 // read number of variables
47 is >> nrVars;
48 if( is.fail() )
49 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of variables");
50 if( verbose >= 2 )
51 cout << "Reading " << nrVars << " variables..." << endl;
52
53 // for each variable, read its number of states
54 vars.reserve( nrVars );
55 for( size_t i = 0; i < nrVars; i++ ) {
56 size_t dim;
57 is >> dim;
58 if( is.fail() )
59 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of states of " + toString(i) + "'th variable");
60 vars.push_back( Var( i, dim ) );
61 }
62
63 // read number of factors
64 is >> nrFacs;
65 if( is.fail() )
66 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of factors");
67 if( verbose >= 2 )
68 cout << "Reading " << nrFacs << " factors..." << endl;
69
70 // for each factor, read the variables on which it depends
71 vector<vector<Var> > factorVars;
72 factors.reserve( nrFacs );
73 factorVars.reserve( nrFacs );
74 for( size_t I = 0; I < nrFacs; I++ ) {
75 if( verbose >= 3 )
76 cout << "Reading factor " << I << "..." << endl;
77
78 // read number of variables for factor I
79 size_t I_nrVars;
80 is >> I_nrVars;
81 if( is.fail() )
82 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of variables for " + toString(I) + "'th factor");
83 if( verbose >= 3 )
84 cout << " which depends on " << I_nrVars << " variables" << endl;
85
86 // read the variable labels
87 vector<long> I_labels;
88 vector<size_t> I_dims;
89 I_labels.reserve( I_nrVars );
90 I_dims.reserve( I_nrVars );
91 factorVars[I].reserve( I_nrVars );
92 for( size_t _i = 0; _i < I_nrVars; _i++ ) {
93 long label;
94 is >> label;
95 if( is.fail() )
96 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read variable labels for " + toString(I) + "'th factor");
97 I_labels.push_back( label );
98 I_dims.push_back( vars[label].states() );
99 factorVars[I].push_back( vars[label] );
100 }
101 if( verbose >= 3 )
102 cout << " labels: " << I_labels << ", dimensions " << I_dims << endl;
103
104 // add the factor and the labels
105 factors.push_back( Factor( VarSet( factorVars[I].begin(), factorVars[I].end(), factorVars[I].size() ), (Real)0 ) );
106 }
107
108 // for each factor, read its values
109 permutations.reserve( nrFacs );
110 for( size_t I = 0; I < nrFacs; I++ ) {
111 if( verbose >= 3 )
112 cout << "Reading factor " << I << "..." << endl;
113
114 // calculate permutation object, reversing the indexing in factorVars[I] first
115 Permute permindex( factorVars[I], true );
116 permutations.push_back( permindex );
117
118 // read factor values
119 size_t nrNonZeros;
120 is >> nrNonZeros;
121 if( is.fail() )
122 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read number of nonzero factor values for " + toString(I) + "'th factor");
123 if( verbose >= 3 )
124 cout << " number of nonzero values: " << nrNonZeros << endl;
125 DAI_ASSERT( nrNonZeros == factors[I].nrStates() );
126 for( size_t li = 0; li < nrNonZeros; li++ ) {
127 Real val;
128 is >> val;
129 if( is.fail() )
130 DAI_THROWE(INVALID_FACTORGRAPH_FILE,"Cannot read factor values of " + toString(I) + "'th factor");
131 // assign value after calculating its linear index corresponding to the permutation
132 if( verbose >= 4 )
133 cout << " " << li << "'th value " << val << " corresponds with index " << permindex.convertLinearIndex(li) << endl;
134 factors[I].set( permindex.convertLinearIndex( li ), val );
135 }
136 }
137 if( verbose >= 3 )
138 cout << "variables:" << vars << endl;
139 if( verbose >= 3 )
140 cout << "factors:" << factors << endl;
141
142 // close file
143 is.close();
144 } else
145 DAI_THROWE(CANNOT_READ_FILE,"Cannot read from file " + std::string(filename));
146 }
147
148
149 std::vector<std::map<size_t, size_t> > ReadUaiAieEvidenceFile( const char* filename, size_t verbose ) {
150 vector<map<size_t, size_t> > evid;
151
152 // open file
153 ifstream is;
154 string line;
155 is.open( filename );
156 if( is.is_open() ) {
157 // read number of lines
158 getline( is, line );
159 if( is.fail() || line.size() == 0 )
160 DAI_THROWE(INVALID_EVIDENCE_FILE,"Cannot read header line of evidence file");
161 if( line[line.size() - 1] == '\r' )
162 line.resize( line.size() - 1 ); // for DOS text files
163 size_t nrLines = fromString<size_t>( line );
164 if( verbose >= 2 )
165 cout << "Reading " << nrLines << " evidence file lines..." << endl;
166
167 if( nrLines ) {
168 // detect version (pre-2010 or 2010)
169 streampos pos = is.tellg();
170 getline( is, line );
171 if( is.fail() || line.size() == 0 )
172 DAI_THROWE(INVALID_EVIDENCE_FILE,"Cannot read second line of evidence file");
173 if( line[line.size() - 1] == '\r' )
174 line.resize( line.size() - 1 ); // for DOS text files
175 vector<string> cols;
176 cols = tokenizeString( line, false, " \t" );
177 bool oldVersion = true;
178 if( cols.size() % 2 )
179 oldVersion = false;
180 if( verbose >= 2 ) {
181 if( oldVersion )
182 cout << "Detected old (2006, 2008) evidence file format" << endl;
183 else
184 cout << "Detected new (2010) evidence file format" << endl;
185 }
186 size_t nrEvid;
187 if( oldVersion ) {
188 nrEvid = 1;
189 is.seekg( 0 );
190 } else {
191 nrEvid = nrLines;
192 is.seekg( pos );
193 }
194
195 // read all evidence cases
196 if( verbose >= 2 )
197 cout << "Reading " << nrEvid << " evidence cases..." << endl;
198 evid.resize( nrEvid );
199 for( size_t i = 0; i < nrEvid; i++ ) {
200 // read number of variables
201 size_t nrObs;
202 is >> nrObs;
203 if( is.fail() )
204 DAI_THROWE(INVALID_EVIDENCE_FILE,"Evidence case " + toString(i) + ": Cannot read number of observations");
205 if( verbose >= 2 )
206 cout << "Evidence case " << i << ": reading " << nrObs << " observations..." << endl;
207
208 // for each observation, read the variable label and the observed value
209 for( size_t j = 0; j < nrObs; j++ ) {
210 size_t label, val;
211 is >> label;
212 if( is.fail() )
213 DAI_THROWE(INVALID_EVIDENCE_FILE,"Evidence case " + toString(i) + ": Cannot read label for " + toString(j) + "'th observed variable");
214 is >> val;
215 if( is.fail() )
216 DAI_THROWE(INVALID_EVIDENCE_FILE,"Evidence case " + toString(i) + ": Cannot read value of " + toString(j) + "'th observed variable");
217 if( verbose >= 3 )
218 cout << " variable: " << label << ", value: " << val << endl;
219 evid[i][label] = val;
220 }
221 }
222 }
223
224 // close file
225 is.close();
226 } else
227 DAI_THROWE(CANNOT_READ_FILE,"Cannot read from file " + std::string(filename));
228
229 if( evid.size() == 0 )
230 evid.resize( 1 );
231
232 return evid;
233 }
234
235
236 } // end of namespace dai