Removed stuff from InfAlg, moved it to individual inference algorithms
[libdai.git] / src / mr.cpp
1 /* Copyright (C) 2006-2008 Joris Mooij [j dot mooij at science dot ru dot nl]
2 Radboud University Nijmegen, The Netherlands
3
4 This file is part of libDAI.
5
6 libDAI is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 libDAI is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with libDAI; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21
22 #include <cstdio>
23 #include <ctime>
24 #include <cmath>
25 #include <cstdlib>
26 #include <dai/mr.h>
27 #include <dai/bp.h>
28 #include <dai/jtree.h>
29 #include <dai/util.h>
30 #include <dai/diffs.h>
31
32
33 namespace dai {
34
35
36 using namespace std;
37
38
39 const char *MR::Name = "MR";
40
41
42 void MR::setProperties( const PropertySet &opts ) {
43 assert( opts.hasKey("tol") );
44 assert( opts.hasKey("verbose") );
45 assert( opts.hasKey("updates") );
46 assert( opts.hasKey("inits") );
47
48 props.tol = opts.getStringAs<double>("tol");
49 props.verbose = opts.getStringAs<size_t>("verbose");
50 props.updates = opts.getStringAs<Properties::UpdateType>("updates");
51 props.inits = opts.getStringAs<Properties::InitType>("inits");
52 }
53
54
55 PropertySet MR::getProperties() const {
56 PropertySet opts;
57 opts.Set( "tol", props.tol );
58 opts.Set( "verbose", props.verbose );
59 opts.Set( "updates", props.updates );
60 opts.Set( "inits", props.inits );
61 return opts;
62 }
63
64
65 // init N, con, nb, tJ, theta
66 void MR::init(size_t Nin, double *_w, double *_th) {
67 size_t i,j;
68
69 N = Nin;
70
71 con.resize(N);
72 nb.resize(N);
73 tJ.resize(N);
74 for(i=0; i<N; i++ ) {
75 nb[i].resize(kmax);
76 tJ[i].resize(kmax);
77 con[i]=0;
78 for(j=0; j<N; j++ )
79 if( _w[i*N+j] != 0.0 ) {
80 nb[i][con[i]] = j;
81 tJ[i][con[i]] = tanh(_w[i*N+j]);
82 con[i]++;
83 }
84 }
85
86 theta.resize(N);
87 for(i=0; i<N; i++)
88 theta[i] = _th[i];
89 }
90
91
92 // calculate cors
93 double MR::init_cor_resp() {
94 size_t j,k,l, runx,i2;
95 double variab1, variab2;
96 double md, maxdev;
97 double thbJsite[kmax];
98 double xinter;
99 double rinter;
100 double res[kmax];
101 size_t s2;
102 size_t flag;
103 size_t concav;
104 size_t runs = 3000;
105 double eps = 0.2;
106 size_t cavity;
107
108 vector<vector<double> > tJ_org;
109 vector<vector<size_t> > nb_org;
110 vector<size_t> con_org;
111 vector<double> theta_org;
112
113 vector<double> xfield(N*kmax,0.0);
114 vector<double> rfield(N*kmax,0.0);
115 vector<double> Hfield(N,0.0);
116 vector<double> devs(N*kmax,0.0);
117 vector<double> devs2(N*kmax,0.0);
118 vector<double> dev(N,0.0);
119 vector<double> avmag(N,0.0);
120
121 // save original tJ, nb
122 nb_org = nb;
123 tJ_org = tJ;
124 con_org = con;
125 theta_org = theta;
126
127 maxdev = 0.0;
128 for(cavity=0; cavity<N; cavity++){ // for each spin to be removed
129 con = con_org;
130 concav=con[cavity];
131
132 nb = nb_org;
133 tJ = tJ_org;
134
135 // Adapt the graph variables nb[], tJ[] and con[]
136 for(size_t i=0; i<con[cavity]; i++) {
137 size_t ij = nb[cavity][i];
138 flag=0;
139 j=0;
140 do{
141 if(nb[ij][j]==cavity){
142 while(j<(con[ij]-1)){
143 nb[ij][j]=nb[ij][j+1];
144 tJ[ij][j] = tJ[ij][j+1];
145 j++;
146 }
147 flag=1;
148 }
149 j++;
150 } while(flag==0);
151 }
152 for(size_t i=0; i<con[cavity]; i++)
153 con[nb[cavity][i]]--;
154 con[cavity] = 0;
155
156
157 // Do everything starting from the new graph********
158
159 makekindex();
160 theta = theta_org;
161
162 for(size_t i=0; i<kmax*N; i++)
163 xfield[i] = 3.0*(2*rnd_uniform()-1.);
164
165 for(i2=0; i2<concav; i2++){ // Subsequently apply a field to each cavity spin ****************
166
167 s2 = nb[cavity][i2]; // identify the index of the cavity spin
168 for(size_t i=0; i<con[s2]; i++)
169 rfield[kmax*s2+i] = 1.;
170
171 runx=0;
172 do { // From here start the response and belief propagation
173 runx++;
174 md=0.0;
175 for(k=0; k<N; k++){
176 if(k!=cavity) {
177 for(size_t i=0; i<con[k]; i++)
178 thbJsite[i] = tJ[k][i];
179 for(l=0; l<con[k]; l++){
180 xinter = 1.;
181 rinter = 0.;
182 if(k==s2) rinter += 1.;
183 for(j=0; j<con[k]; j++)
184 if(j!=l){
185 variab2 = tanh(xfield[kmax*nb[k][j]+kindex[k][j]]);
186 variab1 = thbJsite[j]*variab2;
187 xinter *= (1.+variab1)/(1.-variab1);
188
189 rinter += thbJsite[j]*rfield[kmax*nb[k][j]+kindex[k][j]]*(1-variab2*variab2)/(1-variab1*variab1);
190 }
191
192 variab1 = 0.5*log(xinter);
193 xinter = variab1 + theta[k];
194 devs[kmax*k+l] = xinter-xfield[kmax*k+l];
195 xfield[kmax*k+l] = xfield[kmax*k+l]+devs[kmax*k+l]*eps;
196 if( fabs(devs[kmax*k+l]) > md )
197 md = fabs(devs[kmax*k+l]);
198
199 devs2[kmax*k+l] = rinter-rfield[kmax*k+l];
200 rfield[kmax*k+l] = rfield[kmax*k+l]+devs2[kmax*k+l]*eps;
201 if( fabs(devs2[kmax*k+l]) > md )
202 md = fabs(devs2[kmax*k+l]);
203 }
204 }
205 }
206 } while((md > props.tol)&&(runx<runs)); // Precision condition reached -> BP and RP finished
207 if(runx==runs)
208 if( props.verbose >= 2 )
209 cout << "init_cor_resp: Convergence not reached (md=" << md << ")..." << endl;
210 if(md > maxdev)
211 maxdev = md;
212
213 // compute the observables (i.e. magnetizations and responses)******
214
215 for(size_t i=0; i<concav; i++){
216 rinter = 0.;
217 xinter = 1.;
218 if(i!=i2)
219 for(j=0; j<con[nb[cavity][i]]; j++){
220 variab2 = tanh(xfield[kmax*nb[nb[cavity][i]][j]+kindex[nb[cavity][i]][j]]);
221 variab1 = tJ[nb[cavity][i]][j]*variab2;
222 rinter += tJ[nb[cavity][i]][j]*rfield[kmax*nb[nb[cavity][i]][j]+kindex[nb[cavity][i]][j]]*(1-variab2*variab2)/(1-variab1*variab1);
223 xinter *= (1.+variab1)/(1.-variab1);
224 }
225 xinter = tanh(0.5*log(xinter)+theta[nb[cavity][i]]);
226 res[i] = rinter*(1-xinter*xinter);
227 }
228
229 // *******************
230
231 for(size_t i=0; i<concav; i++)
232 if(nb[cavity][i]!=s2)
233 // if(i!=i2)
234 cors[cavity][i2][i] = res[i];
235 else
236 cors[cavity][i2][i] = 0;
237 } // close for i2 = 0...concav
238 }
239
240 // restore nb, tJ, con
241 tJ = tJ_org;
242 nb = nb_org;
243 con = con_org;
244 theta = theta_org;
245
246 return maxdev;
247 }
248
249
250 double MR::T(size_t i, sub_nb A) {
251 // i is a variable index
252 // A is a subset of nb[i]
253 //
254 // calculate T{(i)}_A as defined in Rizzo&Montanari e-print (2.17)
255
256 sub_nb _nbi_min_A(con[i]);
257 for( size_t __j = 0; __j < A.size(); __j++ )
258 _nbi_min_A -= A[__j];
259
260 double res = theta[i];
261 for( size_t __j = 0; __j < _nbi_min_A.size(); __j++ ) {
262 size_t _j = _nbi_min_A[__j];
263 res += atanh(tJ[i][_j] * M[i][_j]);
264 }
265 return tanh(res);
266 }
267
268
269 double MR::T(size_t i, size_t _j) {
270 sub_nb j(con[i]);
271 j.clear();
272 j += _j;
273 return T(i,j);
274 }
275
276
277 double MR::Omega(size_t i, size_t _j, size_t _l) {
278 sub_nb jl(con[i]);
279 jl.clear();
280 jl += _j;
281 jl += _l;
282 double Tijl = T(i,jl);
283 return Tijl / (1.0 + tJ[i][_l] * M[i][_l] * Tijl);
284 }
285
286
287 double MR::Gamma(size_t i, size_t _j, size_t _l1, size_t _l2) {
288 sub_nb jll(con[i]);
289 jll.clear();
290 jll += _j;
291 double Tij = T(i,jll);
292 jll += _l1;
293 jll += _l2;
294 double Tijll = T(i,jll);
295
296 return (Tijll - Tij) / (1.0 + tJ[i][_l1] * tJ[i][_l2] * M[i][_l1] * M[i][_l2] + tJ[i][_l1] * M[i][_l1] * Tijll + tJ[i][_l2] * M[i][_l2] * Tijll);
297 }
298
299
300 double MR::Gamma(size_t i, size_t _l1, size_t _l2) {
301 sub_nb ll(con[i]);
302 ll.clear();
303 double Ti = T(i,ll);
304 ll += _l1;
305 ll += _l2;
306 double Till = T(i,ll);
307
308 return (Till - Ti) / (1.0 + tJ[i][_l1] * tJ[i][_l2] * M[i][_l1] * M[i][_l2] + tJ[i][_l1] * M[i][_l1] * Till + tJ[i][_l2] * M[i][_l2] * Till);
309 }
310
311
312 double MR::_tJ(size_t i, sub_nb A) {
313 // i is a variable index
314 // A is a subset of nb[i]
315 //
316 // calculate the product of all tJ[i][_j] for _j in A
317
318 size_t Asize = A.size();
319 switch( Asize ) {
320 case 0: return 1.0;
321 // case 1: return tJ[i][A[0]];
322 // case 2: return tJ[i][A[0]] * tJ[i][A[1]];
323 // case 3: return tJ[i][A[0]] * tJ[i][A[1]] * tJ[i][A[2]];
324 default:
325 size_t __j = Asize - 1;
326 size_t _j = A[__j];
327 sub_nb A_j = A - _j;
328 return tJ[i][_j] * _tJ(i, A_j);
329 }
330
331 }
332
333
334 double MR::appM(size_t i, sub_nb A) {
335 // i is a variable index
336 // A is a subset of nb[i]
337 //
338 // calculate the moment of variables in A from M and cors, neglecting higher order cumulants,
339 // defined as the sum over all partitions of A into subsets of cardinality two at most of the
340 // product of the cumulants (either first order, i.e. M, or second order, i.e. cors) of the
341 // entries of the partitions
342
343 size_t Asize = A.size();
344 switch( Asize ) {
345 case 0: return 1.0;
346 // case 1: return M[i][A[0]];
347 // case 2: return M[i][A[0]] * M[i][A[1]] + cors[i][A[0]][A[1]];
348 // case 3: return M[i][A[0]] * M[i][A[1]] * M[i][A[2]] + M[i][A[0]] * cors[i][A[1]][A[2]] + M[i][A[1]] * cors[i][A[0]][A[2]] + M[i][A[2]] * cors[i][A[0]][A[1]];
349 default:
350 size_t __j = Asize - 1;
351 size_t _j = A[__j];
352 sub_nb A_j = A - _j;
353
354 double result = M[i][_j] * appM(i, A_j);
355 for( size_t __k = 0; __k < __j; __k++ ) {
356 size_t _k = A[__k];
357 result += cors[i][_j][_k] * appM(i,A_j - _k);
358 }
359
360 return result;
361 }
362 }
363
364
365 void MR::sum_subs(size_t j, sub_nb A, double *sum_even, double *sum_odd) {
366 // j is a variable index
367 // A is a subset of nb[j]
368
369 // calculate sum over all even/odd subsets B of A of _tJ(j,B) appM(j,B)
370
371 *sum_even = 0.0;
372 *sum_odd = 0.0;
373
374 sub_nb S(A.size());
375 S.clear();
376 do { // for all subsets of A
377
378 // construct subset B of A corresponding to S
379 sub_nb B = A;
380 B.clear();
381 size_t Ssize = S.size();
382 for( size_t bit = 0; bit < Ssize; bit++ )
383 B += A[S[bit]];
384
385 if( S.size() % 2 )
386 *sum_odd += _tJ(j,B) * appM(j,B);
387 else
388 *sum_even += _tJ(j,B) * appM(j,B);
389
390 ++S;
391 } while( !S.empty() );
392 }
393
394
395 void MR::solvemcav() {
396 double sum_even, sum_odd;
397 double maxdev;
398 size_t maxruns = 1000;
399
400 makekindex();
401 for(size_t i=0; i<N; i++)
402 for(size_t _j=0; _j<con[i]; _j++)
403 M[i][_j]=0.1;
404
405 size_t run=0;
406 do {
407 maxdev=0.0;
408 run++;
409 for(size_t i=0; i<N; i++){ // for all i
410 for(size_t _j=0; _j<con[i]; _j++){ // for all j in N_i
411 size_t _i = kindex[i][_j];
412 size_t j = nb[i][_j];
413 assert( nb[j][_i] == i );
414
415 double newM = 0.0;
416 if( props.updates == Properties::UpdateType::FULL ) {
417 // find indices in nb[j] that do not correspond with i
418 sub_nb _nbj_min_i(con[j]);
419 _nbj_min_i -= kindex[i][_j];
420
421 // find indices in nb[i] that do not correspond with j
422 sub_nb _nbi_min_j(con[i]);
423 _nbi_min_j -= _j;
424
425 sum_subs(j, _nbj_min_i, &sum_even, &sum_odd);
426 newM = (tanh(theta[j]) * sum_even + sum_odd) / (sum_even + tanh(theta[j]) * sum_odd);
427
428 sum_subs(i, _nbi_min_j, &sum_even, &sum_odd);
429 double denom = sum_even + tanh(theta[i]) * sum_odd;
430 double numer = 0.0;
431 for(size_t _k=0; _k<con[i]; _k++) if(_k != _j) {
432 sum_subs(i, _nbi_min_j - _k, &sum_even, &sum_odd);
433 numer += tJ[i][_k] * cors[i][_j][_k] * (tanh(theta[i]) * sum_even + sum_odd);
434 }
435 newM -= numer / denom;
436 } else if( props.updates == Properties::UpdateType::LINEAR ) {
437 newM = T(j,_i);
438 for(size_t _l=0; _l<con[i]; _l++) if( _l != _j )
439 newM -= Omega(i,_j,_l) * tJ[i][_l] * cors[i][_j][_l];
440 for(size_t _l1=0; _l1<con[j]; _l1++) if( _l1 != _i )
441 for( size_t _l2=_l1+1; _l2<con[j]; _l2++) if( _l2 != _i)
442 newM += Gamma(j,_i,_l1,_l2) * tJ[j][_l1] * tJ[j][_l2] * cors[j][_l1][_l2];
443 }
444
445 double dev = newM - M[i][_j];
446 // dev *= 0.02;
447 if( fabs(dev) >= maxdev )
448 maxdev = fabs(dev);
449
450 newM = M[i][_j] + dev;
451 if( fabs(newM) > 1.0 )
452 newM = sign(newM);
453 M[i][_j] = newM;
454 }
455 }
456 } while((maxdev>props.tol)&&(run<maxruns));
457
458 if( maxdev > maxdiff )
459 maxdiff = maxdev;
460
461 if(run==maxruns){
462 if( props.verbose >= 1 )
463 cout << "solve_mcav: Convergence not reached (maxdev=" << maxdev << ")..." << endl;
464 }
465 }
466
467
468 void MR::solveM() {
469 for(size_t i=0; i<N; i++) {
470 if( props.updates == Properties::UpdateType::FULL ) {
471 // find indices in nb[i]
472 sub_nb _nbi(con[i]);
473
474 // calc numerator1 and denominator1
475 double sum_even, sum_odd;
476 sum_subs(i, _nbi, &sum_even, &sum_odd);
477
478 Mag[i] = (tanh(theta[i]) * sum_even + sum_odd) / (sum_even + tanh(theta[i]) * sum_odd);
479
480 } else if( props.updates == Properties::UpdateType::LINEAR ) {
481 sub_nb empty(con[i]);
482 empty.clear();
483 Mag[i] = T(i,empty);
484
485 for(size_t _l1=0; _l1<con[i]; _l1++)
486 for( size_t _l2=_l1+1; _l2<con[i]; _l2++)
487 Mag[i] += Gamma(i,_l1,_l2) * tJ[i][_l1] * tJ[i][_l2] * cors[i][_l1][_l2];
488 }
489 if(fabs(Mag[i])>1.)
490 Mag[i] = sign(Mag[i]);
491 }
492 }
493
494
495 void MR::init_cor() {
496 for( size_t i = 0; i < nrVars(); i++ ) {
497 vector<Factor> pairq;
498 if( props.inits == Properties::InitType::CLAMPING ) {
499 BP bpcav(*this, PropertySet()("updates",string("SEQMAX"))("tol", string("1e-9"))("maxiter", string("1000UL"))("verbose", string("0UL"))("logdomain", string("0")));
500 bpcav.makeCavity( i );
501 pairq = calcPairBeliefs( bpcav, delta(i), false );
502 } else if( props.inits == Properties::InitType::EXACT ) {
503 JTree jtcav(*this, PropertySet()("updates",string("HUGIN"))("verbose", 0UL) );
504 jtcav.makeCavity( i );
505 pairq = calcPairBeliefs( jtcav, delta(i), false );
506 }
507 for( size_t jk = 0; jk < pairq.size(); jk++ ) {
508 VarSet::const_iterator kit = pairq[jk].vars().begin();
509 size_t j = findVar( *(kit) );
510 size_t k = findVar( *(++kit) );
511 pairq[jk].normalize(Prob::NORMPROB);
512 double cor = (pairq[jk][3] - pairq[jk][2] - pairq[jk][1] + pairq[jk][0]) - (pairq[jk][3] + pairq[jk][2] - pairq[jk][1] - pairq[jk][0]) * (pairq[jk][3] - pairq[jk][2] + pairq[jk][1] - pairq[jk][0]);
513 for( size_t _j = 0; _j < con[i]; _j++ ) if( nb[i][_j] == j )
514 for( size_t _k = 0; _k < con[i]; _k++ ) if( nb[i][_k] == k ) {
515 cors[i][_j][_k] = cor;
516 cors[i][_k][_j] = cor;
517 }
518 }
519 }
520 }
521
522
523 string MR::identify() const {
524 stringstream result (stringstream::out);
525 result << Name << getProperties();
526 return result.str();
527 }
528
529
530 double MR::run() {
531 if( supported ) {
532 if( props.verbose >= 1 )
533 cout << "Starting " << identify() << "...";
534
535 double tic = toc();
536 // Diffs diffs(nrVars(), 1.0);
537
538 M.resize(N);
539 for(size_t i=0; i<N; i++)
540 M[i].resize(kmax);
541
542 cors.resize(N);
543 for(size_t i=0; i<N; i++)
544 cors[i].resize(kmax);
545 for(size_t i=0; i<N; i++)
546 for(size_t j=0; j<kmax; j++)
547 cors[i][j].resize(kmax);
548
549 kindex.resize(N);
550 for(size_t i=0; i<N; i++)
551 kindex[i].resize(kmax);
552
553 if( props.inits == Properties::InitType::RESPPROP ) {
554 double md = init_cor_resp();
555 if( md > maxdiff )
556 maxdiff = md;
557 } else if( props.inits == Properties::InitType::EXACT )
558 init_cor(); // FIXME no MaxDiff() calculation
559 else if( props.inits == Properties::InitType::CLAMPING )
560 init_cor(); // FIXME no MaxDiff() calculation
561
562 solvemcav();
563
564 Mag.resize(N);
565 solveM();
566
567 if( props.verbose >= 1 )
568 cout << "MR needed " << toc() - tic << " clocks." << endl;
569
570 return 0.0;
571 } else
572 return NAN;
573 }
574
575
576 void MR::makekindex() {
577 for(size_t i=0; i<N; i++)
578 for(size_t j=0; j<con[i]; j++) {
579 size_t ij = nb[i][j]; // ij is the j'th neighbour of spin i
580 size_t k=0;
581 while( nb[ij][k] != i )
582 k++;
583 kindex[i][j] = k; // the j'th neighbour of spin i has spin i as its k'th neighbour
584 }
585 }
586
587
588 Factor MR::belief( const Var &n ) const {
589 if( supported ) {
590 size_t i = findVar( n );
591
592 Real x[2];
593 x[0] = 0.5 - Mag[i] / 2.0;
594 x[1] = 0.5 + Mag[i] / 2.0;
595
596 return Factor( n, x );
597 } else
598 return Factor();
599 }
600
601
602 vector<Factor> MR::beliefs() const {
603 vector<Factor> result;
604 for( size_t i = 0; i < nrVars(); i++ )
605 result.push_back( belief( var(i) ) );
606 return result;
607 }
608
609
610
611 MR::MR( const FactorGraph &fg, const PropertySet &opts ) : DAIAlgFG(fg), supported(true), maxdiff(0.0) {
612 setProperties( opts );
613
614 // check whether all vars in fg are binary
615 // check whether connectivity is <= kmax
616 for( size_t i = 0; i < fg.nrVars(); i++ )
617 if( (fg.var(i).states() > 2) || (fg.delta(i).size() > kmax) ) {
618 supported = false;
619 break;
620 }
621
622 if( !supported )
623 return;
624
625 // check whether all interactions are pairwise or single
626 for( size_t I = 0; I < fg.nrFactors(); I++ )
627 if( fg.factor(I).vars().size() > 2 ) {
628 supported = false;
629 break;
630 }
631
632 if( !supported )
633 return;
634
635 // create w and th
636 size_t Nin = fg.nrVars();
637
638 double *w = new double[Nin*Nin];
639 double *th = new double[Nin];
640
641 for( size_t i = 0; i < Nin; i++ ) {
642 th[i] = 0.0;
643 for( size_t j = 0; j < Nin; j++ )
644 w[i*Nin+j] = 0.0;
645 }
646
647 for( size_t I = 0; I < fg.nrFactors(); I++ ) {
648 const Factor &psi = fg.factor(I);
649 if( psi.vars().size() == 1 ) {
650 size_t i = fg.findVar( *(psi.vars().begin()) );
651 th[i] += 0.5 * log(psi[1] / psi[0]);
652 } else if( psi.vars().size() == 2 ) {
653 size_t i = fg.findVar( *(psi.vars().begin()) );
654 VarSet::const_iterator jit = psi.vars().begin();
655 size_t j = fg.findVar( *(++jit) );
656
657 w[i*Nin+j] += 0.25 * log(psi[3] * psi[0] / (psi[2] * psi[1]));
658 w[j*Nin+i] += 0.25 * log(psi[3] * psi[0] / (psi[2] * psi[1]));
659
660 th[i] += 0.25 * log(psi[3] / psi[2] * psi[1] / psi[0]);
661 th[j] += 0.25 * log(psi[3] / psi[1] * psi[2] / psi[0]);
662 }
663 }
664
665 init(Nin, w, th);
666
667 delete th;
668 delete w;
669 }
670
671
672 } // end of namespace dai