[Frederik Eaton] Change cout to cerr in warnings and error messages
[libdai.git] / src / mr.cpp
1 /* Copyright (C) 2006-2008 Joris Mooij [joris dot mooij at tuebingen dot mpg dot de]
2 Radboud University Nijmegen, The Netherlands /
3 Max Planck Institute for Biological Cybernetics, Germany
4
5 Copyright (C) 2007 Bastian Wemmenhove
6 Radboud University Nijmegen, The Netherlands
7
8 This file is part of libDAI.
9
10 libDAI is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 libDAI is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with libDAI; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25
26 #include <cstdio>
27 #include <ctime>
28 #include <cmath>
29 #include <cstdlib>
30 #include <dai/mr.h>
31 #include <dai/bp.h>
32 #include <dai/jtree.h>
33 #include <dai/util.h>
34
35
36 namespace dai {
37
38
39 using namespace std;
40
41
42 const char *MR::Name = "MR";
43
44
45 void MR::setProperties( const PropertySet &opts ) {
46 assert( opts.hasKey("tol") );
47 assert( opts.hasKey("verbose") );
48 assert( opts.hasKey("updates") );
49 assert( opts.hasKey("inits") );
50
51 props.tol = opts.getStringAs<double>("tol");
52 props.verbose = opts.getStringAs<size_t>("verbose");
53 props.updates = opts.getStringAs<Properties::UpdateType>("updates");
54 props.inits = opts.getStringAs<Properties::InitType>("inits");
55 }
56
57
58 PropertySet MR::getProperties() const {
59 PropertySet opts;
60 opts.Set( "tol", props.tol );
61 opts.Set( "verbose", props.verbose );
62 opts.Set( "updates", props.updates );
63 opts.Set( "inits", props.inits );
64 return opts;
65 }
66
67
68 string MR::printProperties() const {
69 stringstream s( stringstream::out );
70 s << "[";
71 s << "tol=" << props.tol << ",";
72 s << "verbose=" << props.verbose << ",";
73 s << "updates=" << props.updates << ",";
74 s << "inits=" << props.inits << "]";
75 return s.str();
76 }
77
78
79 // init N, con, nb, tJ, theta
80 void MR::init(size_t Nin, double *_w, double *_th) {
81 size_t i,j;
82
83 N = Nin;
84
85 con.resize(N);
86 nb.resize(N);
87 tJ.resize(N);
88 for(i=0; i<N; i++ ) {
89 nb[i].resize(kmax);
90 tJ[i].resize(kmax);
91 con[i]=0;
92 for(j=0; j<N; j++ )
93 if( _w[i*N+j] != 0.0 ) {
94 nb[i][con[i]] = j;
95 tJ[i][con[i]] = tanh(_w[i*N+j]);
96 con[i]++;
97 }
98 }
99
100 theta.resize(N);
101 for(i=0; i<N; i++)
102 theta[i] = _th[i];
103 }
104
105
106 // calculate cors
107 double MR::init_cor_resp() {
108 size_t j,k,l, runx,i2;
109 double variab1, variab2;
110 double md, maxdev;
111 double thbJsite[kmax];
112 double xinter;
113 double rinter;
114 double res[kmax];
115 size_t s2;
116 size_t flag;
117 size_t concav;
118 size_t runs = 3000;
119 double eps = 0.2;
120 size_t cavity;
121
122 vector<vector<double> > tJ_org;
123 vector<vector<size_t> > nb_org;
124 vector<size_t> con_org;
125 vector<double> theta_org;
126
127 vector<double> xfield(N*kmax,0.0);
128 vector<double> rfield(N*kmax,0.0);
129 vector<double> Hfield(N,0.0);
130 vector<double> devs(N*kmax,0.0);
131 vector<double> devs2(N*kmax,0.0);
132 vector<double> dev(N,0.0);
133 vector<double> avmag(N,0.0);
134
135 // save original tJ, nb
136 nb_org = nb;
137 tJ_org = tJ;
138 con_org = con;
139 theta_org = theta;
140
141 maxdev = 0.0;
142 for(cavity=0; cavity<N; cavity++){ // for each spin to be removed
143 con = con_org;
144 concav=con[cavity];
145
146 nb = nb_org;
147 tJ = tJ_org;
148
149 // Adapt the graph variables nb[], tJ[] and con[]
150 for(size_t i=0; i<con[cavity]; i++) {
151 size_t ij = nb[cavity][i];
152 flag=0;
153 j=0;
154 do{
155 if(nb[ij][j]==cavity){
156 while(j<(con[ij]-1)){
157 nb[ij][j]=nb[ij][j+1];
158 tJ[ij][j] = tJ[ij][j+1];
159 j++;
160 }
161 flag=1;
162 }
163 j++;
164 } while(flag==0);
165 }
166 for(size_t i=0; i<con[cavity]; i++)
167 con[nb[cavity][i]]--;
168 con[cavity] = 0;
169
170
171 // Do everything starting from the new graph********
172
173 makekindex();
174 theta = theta_org;
175
176 for(size_t i=0; i<kmax*N; i++)
177 xfield[i] = 3.0*(2*rnd_uniform()-1.);
178
179 for(i2=0; i2<concav; i2++){ // Subsequently apply a field to each cavity spin ****************
180
181 s2 = nb[cavity][i2]; // identify the index of the cavity spin
182 for(size_t i=0; i<con[s2]; i++)
183 rfield[kmax*s2+i] = 1.;
184
185 runx=0;
186 do { // From here start the response and belief propagation
187 runx++;
188 md=0.0;
189 for(k=0; k<N; k++){
190 if(k!=cavity) {
191 for(size_t i=0; i<con[k]; i++)
192 thbJsite[i] = tJ[k][i];
193 for(l=0; l<con[k]; l++){
194 xinter = 1.;
195 rinter = 0.;
196 if(k==s2) rinter += 1.;
197 for(j=0; j<con[k]; j++)
198 if(j!=l){
199 variab2 = tanh(xfield[kmax*nb[k][j]+kindex[k][j]]);
200 variab1 = thbJsite[j]*variab2;
201 xinter *= (1.+variab1)/(1.-variab1);
202
203 rinter += thbJsite[j]*rfield[kmax*nb[k][j]+kindex[k][j]]*(1-variab2*variab2)/(1-variab1*variab1);
204 }
205
206 variab1 = 0.5*log(xinter);
207 xinter = variab1 + theta[k];
208 devs[kmax*k+l] = xinter-xfield[kmax*k+l];
209 xfield[kmax*k+l] = xfield[kmax*k+l]+devs[kmax*k+l]*eps;
210 if( fabs(devs[kmax*k+l]) > md )
211 md = fabs(devs[kmax*k+l]);
212
213 devs2[kmax*k+l] = rinter-rfield[kmax*k+l];
214 rfield[kmax*k+l] = rfield[kmax*k+l]+devs2[kmax*k+l]*eps;
215 if( fabs(devs2[kmax*k+l]) > md )
216 md = fabs(devs2[kmax*k+l]);
217 }
218 }
219 }
220 } while((md > props.tol)&&(runx<runs)); // Precision condition reached -> BP and RP finished
221 if(runx==runs)
222 if( props.verbose >= 2 )
223 cerr << "init_cor_resp: Convergence not reached (md=" << md << ")..." << endl;
224 if(md > maxdev)
225 maxdev = md;
226
227 // compute the observables (i.e. magnetizations and responses)******
228
229 for(size_t i=0; i<concav; i++){
230 rinter = 0.;
231 xinter = 1.;
232 if(i!=i2)
233 for(j=0; j<con[nb[cavity][i]]; j++){
234 variab2 = tanh(xfield[kmax*nb[nb[cavity][i]][j]+kindex[nb[cavity][i]][j]]);
235 variab1 = tJ[nb[cavity][i]][j]*variab2;
236 rinter += tJ[nb[cavity][i]][j]*rfield[kmax*nb[nb[cavity][i]][j]+kindex[nb[cavity][i]][j]]*(1-variab2*variab2)/(1-variab1*variab1);
237 xinter *= (1.+variab1)/(1.-variab1);
238 }
239 xinter = tanh(0.5*log(xinter)+theta[nb[cavity][i]]);
240 res[i] = rinter*(1-xinter*xinter);
241 }
242
243 // *******************
244
245 for(size_t i=0; i<concav; i++)
246 if(nb[cavity][i]!=s2)
247 // if(i!=i2)
248 cors[cavity][i2][i] = res[i];
249 else
250 cors[cavity][i2][i] = 0;
251 } // close for i2 = 0...concav
252 }
253
254 // restore nb, tJ, con
255 tJ = tJ_org;
256 nb = nb_org;
257 con = con_org;
258 theta = theta_org;
259
260 return maxdev;
261 }
262
263
264 double MR::T(size_t i, sub_nb A) {
265 // i is a variable index
266 // A is a subset of nb[i]
267 //
268 // calculate T{(i)}_A as defined in Rizzo&Montanari e-print (2.17)
269
270 sub_nb _nbi_min_A(con[i]);
271 _nbi_min_A.set();
272 _nbi_min_A &= ~A;
273
274 double res = theta[i];
275 for( size_t _j = 0; _j < _nbi_min_A.size(); _j++ )
276 if( _nbi_min_A.test(_j) )
277 res += atanh(tJ[i][_j] * M[i][_j]);
278 return tanh(res);
279 }
280
281
282 double MR::T(size_t i, size_t _j) {
283 sub_nb j(con[i]);
284 j.set(_j);
285 return T(i,j);
286 }
287
288
289 double MR::Omega(size_t i, size_t _j, size_t _l) {
290 sub_nb jl(con[i]);
291 jl.set(_j);
292 jl.set(_l);
293 double Tijl = T(i,jl);
294 return Tijl / (1.0 + tJ[i][_l] * M[i][_l] * Tijl);
295 }
296
297
298 double MR::Gamma(size_t i, size_t _j, size_t _l1, size_t _l2) {
299 sub_nb jll(con[i]);
300 jll.set(_j);
301 double Tij = T(i,jll);
302 jll.set(_l1);
303 jll.set(_l2);
304 double Tijll = T(i,jll);
305
306 return (Tijll - Tij) / (1.0 + tJ[i][_l1] * tJ[i][_l2] * M[i][_l1] * M[i][_l2] + tJ[i][_l1] * M[i][_l1] * Tijll + tJ[i][_l2] * M[i][_l2] * Tijll);
307 }
308
309
310 double MR::Gamma(size_t i, size_t _l1, size_t _l2) {
311 sub_nb ll(con[i]);
312 double Ti = T(i,ll);
313 ll.set(_l1);
314 ll.set(_l2);
315 double Till = T(i,ll);
316
317 return (Till - Ti) / (1.0 + tJ[i][_l1] * tJ[i][_l2] * M[i][_l1] * M[i][_l2] + tJ[i][_l1] * M[i][_l1] * Till + tJ[i][_l2] * M[i][_l2] * Till);
318 }
319
320
321 double MR::_tJ(size_t i, sub_nb A) {
322 // i is a variable index
323 // A is a subset of nb[i]
324 //
325 // calculate the product of all tJ[i][_j] for _j in A
326
327 sub_nb::size_type _j = A.find_first();
328 if( _j == sub_nb::npos )
329 return 1.0;
330 else
331 return tJ[i][_j] * _tJ(i, A.reset(_j));
332 }
333
334
335 double MR::appM(size_t i, sub_nb A) {
336 // i is a variable index
337 // A is a subset of nb[i]
338 //
339 // calculate the moment of variables in A from M and cors, neglecting higher order cumulants,
340 // defined as the sum over all partitions of A into subsets of cardinality two at most of the
341 // product of the cumulants (either first order, i.e. M, or second order, i.e. cors) of the
342 // entries of the partitions
343
344 sub_nb::size_type _j = A.find_first();
345 if( _j == sub_nb::npos )
346 return 1.0;
347 else {
348 sub_nb A_j(A); A_j.reset(_j);
349
350 double result = M[i][_j] * appM(i, A_j);
351 for( size_t _k = 0; _k < A_j.size(); _k++ )
352 if( A_j.test(_k) ) {
353 sub_nb A_jk(A_j); A_jk.reset(_k);
354 result += cors[i][_j][_k] * appM(i,A_jk);
355 }
356
357 return result;
358 }
359 }
360
361
362 void MR::sum_subs(size_t j, sub_nb A, double *sum_even, double *sum_odd) {
363 // j is a variable index
364 // A is a subset of nb[j]
365
366 // calculate sum over all even/odd subsets B of A of _tJ(j,B) appM(j,B)
367
368 *sum_even = 0.0;
369 *sum_odd = 0.0;
370
371 sub_nb B(A.size());
372 do {
373 if( B.count() % 2 )
374 *sum_odd += _tJ(j,B) * appM(j,B);
375 else
376 *sum_even += _tJ(j,B) * appM(j,B);
377
378 // calc next subset B
379 size_t bit = 0;
380 for( ; bit < A.size(); bit++ )
381 if( A.test(bit) ) {
382 if( B.test(bit) )
383 B.reset(bit);
384 else {
385 B.set(bit);
386 break;
387 }
388 }
389 } while (!B.none());
390 }
391
392
393 void MR::solvemcav() {
394 double sum_even, sum_odd;
395 double maxdev;
396 size_t maxruns = 1000;
397
398 makekindex();
399 for(size_t i=0; i<N; i++)
400 for(size_t _j=0; _j<con[i]; _j++)
401 M[i][_j]=0.1;
402
403 size_t run=0;
404 do {
405 maxdev=0.0;
406 run++;
407 for(size_t i=0; i<N; i++){ // for all i
408 for(size_t _j=0; _j<con[i]; _j++){ // for all j in N_i
409 size_t _i = kindex[i][_j];
410 size_t j = nb[i][_j];
411 assert( nb[j][_i] == i );
412
413 double newM = 0.0;
414 if( props.updates == Properties::UpdateType::FULL ) {
415 // find indices in nb[j] that do not correspond with i
416 sub_nb _nbj_min_i(con[j]);
417 _nbj_min_i.set();
418 _nbj_min_i.reset(kindex[i][_j]);
419
420 // find indices in nb[i] that do not correspond with j
421 sub_nb _nbi_min_j(con[i]);
422 _nbi_min_j.set();
423 _nbi_min_j.reset(_j);
424
425 sum_subs(j, _nbj_min_i, &sum_even, &sum_odd);
426 newM = (tanh(theta[j]) * sum_even + sum_odd) / (sum_even + tanh(theta[j]) * sum_odd);
427
428 sum_subs(i, _nbi_min_j, &sum_even, &sum_odd);
429 double denom = sum_even + tanh(theta[i]) * sum_odd;
430 double numer = 0.0;
431 for(size_t _k=0; _k<con[i]; _k++) if(_k != _j) {
432 sub_nb _nbi_min_jk(_nbi_min_j);
433 _nbi_min_jk.reset(_k);
434 sum_subs(i, _nbi_min_jk, &sum_even, &sum_odd);
435 numer += tJ[i][_k] * cors[i][_j][_k] * (tanh(theta[i]) * sum_even + sum_odd);
436 }
437 newM -= numer / denom;
438 } else if( props.updates == Properties::UpdateType::LINEAR ) {
439 newM = T(j,_i);
440 for(size_t _l=0; _l<con[i]; _l++) if( _l != _j )
441 newM -= Omega(i,_j,_l) * tJ[i][_l] * cors[i][_j][_l];
442 for(size_t _l1=0; _l1<con[j]; _l1++) if( _l1 != _i )
443 for( size_t _l2=_l1+1; _l2<con[j]; _l2++) if( _l2 != _i)
444 newM += Gamma(j,_i,_l1,_l2) * tJ[j][_l1] * tJ[j][_l2] * cors[j][_l1][_l2];
445 }
446
447 double dev = newM - M[i][_j];
448 // dev *= 0.02;
449 if( fabs(dev) >= maxdev )
450 maxdev = fabs(dev);
451
452 newM = M[i][_j] + dev;
453 if( fabs(newM) > 1.0 )
454 newM = sign(newM);
455 M[i][_j] = newM;
456 }
457 }
458 } while((maxdev>props.tol)&&(run<maxruns));
459
460 _iters = run;
461 if( maxdev > _maxdiff )
462 _maxdiff = maxdev;
463
464 if(run==maxruns){
465 if( props.verbose >= 1 )
466 cerr << "solve_mcav: Convergence not reached (maxdev=" << maxdev << ")..." << endl;
467 }
468 }
469
470
471 void MR::solveM() {
472 for(size_t i=0; i<N; i++) {
473 if( props.updates == Properties::UpdateType::FULL ) {
474 // find indices in nb[i]
475 sub_nb _nbi(con[i]);
476 _nbi.set();
477
478 // calc numerator1 and denominator1
479 double sum_even, sum_odd;
480 sum_subs(i, _nbi, &sum_even, &sum_odd);
481
482 Mag[i] = (tanh(theta[i]) * sum_even + sum_odd) / (sum_even + tanh(theta[i]) * sum_odd);
483
484 } else if( props.updates == Properties::UpdateType::LINEAR ) {
485 sub_nb empty(con[i]);
486 Mag[i] = T(i,empty);
487
488 for(size_t _l1=0; _l1<con[i]; _l1++)
489 for( size_t _l2=_l1+1; _l2<con[i]; _l2++)
490 Mag[i] += Gamma(i,_l1,_l2) * tJ[i][_l1] * tJ[i][_l2] * cors[i][_l1][_l2];
491 }
492 if(fabs(Mag[i])>1.)
493 Mag[i] = sign(Mag[i]);
494 }
495 }
496
497
498 void MR::init_cor() {
499 for( size_t i = 0; i < nrVars(); i++ ) {
500 vector<Factor> pairq;
501 if( props.inits == Properties::InitType::CLAMPING ) {
502 BP bpcav(*this, PropertySet()("updates", string("SEQMAX"))("tol", string("1e-9"))("maxiter", string("10000UL"))("verbose", string("0UL"))("logdomain", string("0")));
503 bpcav.makeCavity( i );
504 pairq = calcPairBeliefs( bpcav, delta(i), false );
505 } else if( props.inits == Properties::InitType::EXACT ) {
506 JTree jtcav(*this, PropertySet()("updates", string("HUGIN"))("verbose", string("0UL")) );
507 jtcav.makeCavity( i );
508 pairq = calcPairBeliefs( jtcav, delta(i), false );
509 }
510 for( size_t jk = 0; jk < pairq.size(); jk++ ) {
511 VarSet::const_iterator kit = pairq[jk].vars().begin();
512 size_t j = findVar( *(kit) );
513 size_t k = findVar( *(++kit) );
514 pairq[jk].normalize();
515 double cor = (pairq[jk][3] - pairq[jk][2] - pairq[jk][1] + pairq[jk][0]) - (pairq[jk][3] + pairq[jk][2] - pairq[jk][1] - pairq[jk][0]) * (pairq[jk][3] - pairq[jk][2] + pairq[jk][1] - pairq[jk][0]);
516 for( size_t _j = 0; _j < con[i]; _j++ ) if( nb[i][_j] == j )
517 for( size_t _k = 0; _k < con[i]; _k++ ) if( nb[i][_k] == k ) {
518 cors[i][_j][_k] = cor;
519 cors[i][_k][_j] = cor;
520 }
521 }
522 }
523 }
524
525
526 string MR::identify() const {
527 return string(Name) + printProperties();
528 }
529
530
531 double MR::run() {
532 if( supported ) {
533 if( props.verbose >= 1 )
534 cerr << "Starting " << identify() << "...";
535
536 double tic = toc();
537 // Diffs diffs(nrVars(), 1.0);
538
539 M.resize(N);
540 for(size_t i=0; i<N; i++)
541 M[i].resize(kmax);
542
543 cors.resize(N);
544 for(size_t i=0; i<N; i++)
545 cors[i].resize(kmax);
546 for(size_t i=0; i<N; i++)
547 for(size_t j=0; j<kmax; j++)
548 cors[i][j].resize(kmax);
549
550 kindex.resize(N);
551 for(size_t i=0; i<N; i++)
552 kindex[i].resize(kmax);
553
554 if( props.inits == Properties::InitType::RESPPROP ) {
555 double md = init_cor_resp();
556 if( md > _maxdiff )
557 _maxdiff = md;
558 } else if( props.inits == Properties::InitType::EXACT )
559 init_cor(); // FIXME no MaxDiff() calculation
560 else if( props.inits == Properties::InitType::CLAMPING )
561 init_cor(); // FIXME no MaxDiff() calculation
562
563 solvemcav();
564
565 Mag.resize(N);
566 solveM();
567
568 if( props.verbose >= 1 )
569 cerr << Name << " needed " << toc() - tic << " seconds." << endl;
570
571 return 0.0;
572 } else
573 return 1.0;
574 }
575
576
577 void MR::makekindex() {
578 for(size_t i=0; i<N; i++)
579 for(size_t j=0; j<con[i]; j++) {
580 size_t ij = nb[i][j]; // ij is the j'th neighbour of spin i
581 size_t k=0;
582 while( nb[ij][k] != i )
583 k++;
584 kindex[i][j] = k; // the j'th neighbour of spin i has spin i as its k'th neighbour
585 }
586 }
587
588
589 Factor MR::belief( const Var &n ) const {
590 if( supported ) {
591 size_t i = findVar( n );
592
593 Real x[2];
594 x[0] = 0.5 - Mag[i] / 2.0;
595 x[1] = 0.5 + Mag[i] / 2.0;
596
597 return Factor( n, x );
598 } else
599 return Factor();
600 }
601
602
603 vector<Factor> MR::beliefs() const {
604 vector<Factor> result;
605 for( size_t i = 0; i < nrVars(); i++ )
606 result.push_back( belief( var(i) ) );
607 return result;
608 }
609
610
611
612 MR::MR( const FactorGraph &fg, const PropertySet &opts ) : DAIAlgFG(fg), supported(true), _maxdiff(0.0), _iters(0) {
613 setProperties( opts );
614
615 // check whether all vars in fg are binary
616 // check whether connectivity is <= kmax
617 for( size_t i = 0; i < fg.nrVars(); i++ )
618 if( (fg.var(i).states() > 2) || (fg.delta(i).size() > kmax) ) {
619 supported = false;
620 break;
621 }
622
623 if( !supported )
624 return;
625
626 // check whether all interactions are pairwise or single
627 for( size_t I = 0; I < fg.nrFactors(); I++ )
628 if( fg.factor(I).vars().size() > 2 ) {
629 supported = false;
630 break;
631 }
632
633 if( !supported )
634 return;
635
636 // create w and th
637 size_t Nin = fg.nrVars();
638
639 double *w = new double[Nin*Nin];
640 double *th = new double[Nin];
641
642 for( size_t i = 0; i < Nin; i++ ) {
643 th[i] = 0.0;
644 for( size_t j = 0; j < Nin; j++ )
645 w[i*Nin+j] = 0.0;
646 }
647
648 for( size_t I = 0; I < fg.nrFactors(); I++ ) {
649 const Factor &psi = fg.factor(I);
650 if( psi.vars().size() == 1 ) {
651 size_t i = fg.findVar( *(psi.vars().begin()) );
652 th[i] += 0.5 * log(psi[1] / psi[0]);
653 } else if( psi.vars().size() == 2 ) {
654 size_t i = fg.findVar( *(psi.vars().begin()) );
655 VarSet::const_iterator jit = psi.vars().begin();
656 size_t j = fg.findVar( *(++jit) );
657
658 w[i*Nin+j] += 0.25 * log(psi[3] * psi[0] / (psi[2] * psi[1]));
659 w[j*Nin+i] += 0.25 * log(psi[3] * psi[0] / (psi[2] * psi[1]));
660
661 th[i] += 0.25 * log(psi[3] / psi[2] * psi[1] / psi[0]);
662 th[j] += 0.25 * log(psi[3] / psi[1] * psi[2] / psi[0]);
663 }
664 }
665
666 init(Nin, w, th);
667
668 delete th;
669 delete w;
670 }
671
672
673 } // end of namespace dai