Partial adoption of contributions by Giuseppe:
[libdai.git] / src / mr.cpp
1 /* Copyright (C) 2006-2008 Joris Mooij [j dot mooij at science dot ru dot nl]
2 Radboud University Nijmegen, The Netherlands
3
4 This file is part of libDAI.
5
6 libDAI is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 libDAI is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with libDAI; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21
22 #include <cstdio>
23 #include <ctime>
24 #include <cmath>
25 #include <cstdlib>
26 #include <dai/mr.h>
27 #include <dai/bp.h>
28 #include <dai/jtree.h>
29 #include <dai/util.h>
30 #include <dai/diffs.h>
31
32
33 namespace dai {
34
35
36 using namespace std;
37
38
39 const char *MR::Name = "MR";
40
41
42 bool MR::checkProperties() {
43 if( !HasProperty("updates") )
44 return false;
45 if( !HasProperty("inits") )
46 return false;
47 if( !HasProperty("verbose") )
48 return false;
49 if( !HasProperty("tol") )
50 return false;
51
52 ConvertPropertyTo<UpdateType>("updates");
53 ConvertPropertyTo<InitType>("inits");
54 ConvertPropertyTo<size_t>("verbose");
55 ConvertPropertyTo<double>("tol");
56
57 return true;
58 }
59
60
61 // init N, con, nb, tJ, theta
62 void MR::init(size_t _N, double *_w, double *_th) {
63 size_t i,j;
64
65 N = _N;
66
67 con.resize(N);
68 nb.resize(N);
69 tJ.resize(N);
70 for(i=0; i<N; i++ ) {
71 nb[i].resize(kmax);
72 tJ[i].resize(kmax);
73 con[i]=0;
74 for(j=0; j<N; j++ )
75 if( _w[i*N+j] != 0.0 ) {
76 nb[i][con[i]] = j;
77 tJ[i][con[i]] = tanh(_w[i*N+j]);
78 con[i]++;
79 }
80 }
81
82 theta.resize(N);
83 for(i=0; i<N; i++)
84 theta[i] = _th[i];
85 }
86
87
88 // calculate cors
89 double MR::init_cor_resp() {
90 size_t j,k,l, runx,i2;
91 double variab1, variab2;
92 double md, maxdev;
93 double thbJsite[kmax];
94 double xinter;
95 double rinter;
96 double res[kmax];
97 size_t s2;
98 size_t flag;
99 size_t concav;
100 size_t runs = 3000;
101 double eps = 0.2;
102 size_t cavity;
103
104 vector<vector<double> > tJ_org;
105 vector<vector<size_t> > nb_org;
106 vector<size_t> con_org;
107 vector<double> theta_org;
108
109 vector<double> xfield(N*kmax,0.0);
110 vector<double> rfield(N*kmax,0.0);
111 vector<double> Hfield(N,0.0);
112 vector<double> devs(N*kmax,0.0);
113 vector<double> devs2(N*kmax,0.0);
114 vector<double> dev(N,0.0);
115 vector<double> avmag(N,0.0);
116
117 // save original tJ, nb
118 nb_org = nb;
119 tJ_org = tJ;
120 con_org = con;
121 theta_org = theta;
122
123 maxdev = 0.0;
124 for(cavity=0; cavity<N; cavity++){ // for each spin to be removed
125 con = con_org;
126 concav=con[cavity];
127
128 nb = nb_org;
129 tJ = tJ_org;
130
131 // Adapt the graph variables nb[], tJ[] and con[]
132 for(size_t i=0; i<con[cavity]; i++) {
133 size_t ij = nb[cavity][i];
134 flag=0;
135 j=0;
136 do{
137 if(nb[ij][j]==cavity){
138 while(j<(con[ij]-1)){
139 nb[ij][j]=nb[ij][j+1];
140 tJ[ij][j] = tJ[ij][j+1];
141 j++;
142 }
143 flag=1;
144 }
145 j++;
146 } while(flag==0);
147 }
148 for(size_t i=0; i<con[cavity]; i++)
149 con[nb[cavity][i]]--;
150 con[cavity] = 0;
151
152
153 // Do everything starting from the new graph********
154
155 makekindex();
156 theta = theta_org;
157
158 for(size_t i=0; i<kmax*N; i++)
159 xfield[i] = 3.0*(2*rnd_uniform()-1.);
160
161 for(i2=0; i2<concav; i2++){ // Subsequently apply a field to each cavity spin ****************
162
163 s2 = nb[cavity][i2]; // identify the index of the cavity spin
164 for(size_t i=0; i<con[s2]; i++)
165 rfield[kmax*s2+i] = 1.;
166
167 runx=0;
168 do { // From here start the response and belief propagation
169 runx++;
170 md=0.0;
171 for(k=0; k<N; k++){
172 if(k!=cavity) {
173 for(size_t i=0; i<con[k]; i++)
174 thbJsite[i] = tJ[k][i];
175 for(l=0; l<con[k]; l++){
176 xinter = 1.;
177 rinter = 0.;
178 if(k==s2) rinter += 1.;
179 for(j=0; j<con[k]; j++)
180 if(j!=l){
181 variab2 = tanh(xfield[kmax*nb[k][j]+kindex[k][j]]);
182 variab1 = thbJsite[j]*variab2;
183 xinter *= (1.+variab1)/(1.-variab1);
184
185 rinter += thbJsite[j]*rfield[kmax*nb[k][j]+kindex[k][j]]*(1-variab2*variab2)/(1-variab1*variab1);
186 }
187
188 variab1 = 0.5*log(xinter);
189 xinter = variab1 + theta[k];
190 devs[kmax*k+l] = xinter-xfield[kmax*k+l];
191 xfield[kmax*k+l] = xfield[kmax*k+l]+devs[kmax*k+l]*eps;
192 if( fabs(devs[kmax*k+l]) > md )
193 md = fabs(devs[kmax*k+l]);
194
195 devs2[kmax*k+l] = rinter-rfield[kmax*k+l];
196 rfield[kmax*k+l] = rfield[kmax*k+l]+devs2[kmax*k+l]*eps;
197 if( fabs(devs2[kmax*k+l]) > md )
198 md = fabs(devs2[kmax*k+l]);
199 }
200 }
201 }
202 } while((md > Tol())&&(runx<runs)); // Precision condition reached -> BP and RP finished
203 if(runx==runs)
204 if( Verbose() >= 2 )
205 cout << "init_cor_resp: Convergence not reached (md=" << md << ")..." << endl;
206 if(md > maxdev)
207 maxdev = md;
208
209 // compute the observables (i.e. magnetizations and responses)******
210
211 for(size_t i=0; i<concav; i++){
212 rinter = 0.;
213 xinter = 1.;
214 if(i!=i2)
215 for(j=0; j<con[nb[cavity][i]]; j++){
216 variab2 = tanh(xfield[kmax*nb[nb[cavity][i]][j]+kindex[nb[cavity][i]][j]]);
217 variab1 = tJ[nb[cavity][i]][j]*variab2;
218 rinter += tJ[nb[cavity][i]][j]*rfield[kmax*nb[nb[cavity][i]][j]+kindex[nb[cavity][i]][j]]*(1-variab2*variab2)/(1-variab1*variab1);
219 xinter *= (1.+variab1)/(1.-variab1);
220 }
221 xinter = tanh(0.5*log(xinter)+theta[nb[cavity][i]]);
222 res[i] = rinter*(1-xinter*xinter);
223 }
224
225 // *******************
226
227 for(size_t i=0; i<concav; i++)
228 if(nb[cavity][i]!=s2)
229 // if(i!=i2)
230 cors[cavity][i2][i] = res[i];
231 else
232 cors[cavity][i2][i] = 0;
233 } // close for i2 = 0...concav
234 }
235
236 // restore nb, tJ, con
237 tJ = tJ_org;
238 nb = nb_org;
239 con = con_org;
240 theta = theta_org;
241
242 return maxdev;
243 }
244
245
246 double MR::T(size_t i, sub_nb A) {
247 // i is a variable index
248 // A is a subset of nb[i]
249 //
250 // calculate T{(i)}_A as defined in Rizzo&Montanari e-print (2.17)
251
252 sub_nb _nbi_min_A(con[i]);
253 for( size_t __j = 0; __j < A.size(); __j++ )
254 _nbi_min_A -= A[__j];
255
256 double res = theta[i];
257 for( size_t __j = 0; __j < _nbi_min_A.size(); __j++ ) {
258 size_t _j = _nbi_min_A[__j];
259 res += atanh(tJ[i][_j] * M[i][_j]);
260 }
261 return tanh(res);
262 }
263
264
265 double MR::T(size_t i, size_t _j) {
266 sub_nb j(con[i]);
267 j.clear();
268 j += _j;
269 return T(i,j);
270 }
271
272
273 double MR::Omega(size_t i, size_t _j, size_t _l) {
274 sub_nb jl(con[i]);
275 jl.clear();
276 jl += _j;
277 jl += _l;
278 double Tijl = T(i,jl);
279 return Tijl / (1.0 + tJ[i][_l] * M[i][_l] * Tijl);
280 }
281
282
283 double MR::Gamma(size_t i, size_t _j, size_t _l1, size_t _l2) {
284 sub_nb jll(con[i]);
285 jll.clear();
286 jll += _j;
287 double Tij = T(i,jll);
288 jll += _l1;
289 jll += _l2;
290 double Tijll = T(i,jll);
291
292 return (Tijll - Tij) / (1.0 + tJ[i][_l1] * tJ[i][_l2] * M[i][_l1] * M[i][_l2] + tJ[i][_l1] * M[i][_l1] * Tijll + tJ[i][_l2] * M[i][_l2] * Tijll);
293 }
294
295
296 double MR::Gamma(size_t i, size_t _l1, size_t _l2) {
297 sub_nb ll(con[i]);
298 ll.clear();
299 double Ti = T(i,ll);
300 ll += _l1;
301 ll += _l2;
302 double Till = T(i,ll);
303
304 return (Till - Ti) / (1.0 + tJ[i][_l1] * tJ[i][_l2] * M[i][_l1] * M[i][_l2] + tJ[i][_l1] * M[i][_l1] * Till + tJ[i][_l2] * M[i][_l2] * Till);
305 }
306
307
308 double MR::_tJ(size_t i, sub_nb A) {
309 // i is a variable index
310 // A is a subset of nb[i]
311 //
312 // calculate the product of all tJ[i][_j] for _j in A
313
314 size_t Asize = A.size();
315 switch( Asize ) {
316 case 0: return 1.0;
317 // case 1: return tJ[i][A[0]];
318 // case 2: return tJ[i][A[0]] * tJ[i][A[1]];
319 // case 3: return tJ[i][A[0]] * tJ[i][A[1]] * tJ[i][A[2]];
320 default:
321 size_t __j = Asize - 1;
322 size_t _j = A[__j];
323 sub_nb A_j = A - _j;
324 return tJ[i][_j] * _tJ(i, A_j);
325 }
326
327 }
328
329
330 double MR::appM(size_t i, sub_nb A) {
331 // i is a variable index
332 // A is a subset of nb[i]
333 //
334 // calculate the moment of variables in A from M and cors, neglecting higher order cumulants,
335 // defined as the sum over all partitions of A into subsets of cardinality two at most of the
336 // product of the cumulants (either first order, i.e. M, or second order, i.e. cors) of the
337 // entries of the partitions
338
339 size_t Asize = A.size();
340 switch( Asize ) {
341 case 0: return 1.0;
342 // case 1: return M[i][A[0]];
343 // case 2: return M[i][A[0]] * M[i][A[1]] + cors[i][A[0]][A[1]];
344 // case 3: return M[i][A[0]] * M[i][A[1]] * M[i][A[2]] + M[i][A[0]] * cors[i][A[1]][A[2]] + M[i][A[1]] * cors[i][A[0]][A[2]] + M[i][A[2]] * cors[i][A[0]][A[1]];
345 default:
346 size_t __j = Asize - 1;
347 size_t _j = A[__j];
348 sub_nb A_j = A - _j;
349
350 double result = M[i][_j] * appM(i, A_j);
351 for( size_t __k = 0; __k < __j; __k++ ) {
352 size_t _k = A[__k];
353 result += cors[i][_j][_k] * appM(i,A_j - _k);
354 }
355
356 return result;
357 }
358 }
359
360
361 void MR::sum_subs(size_t j, sub_nb A, double *sum_even, double *sum_odd) {
362 // j is a variable index
363 // A is a subset of nb[j]
364
365 // calculate sum over all even/odd subsets B of A of _tJ(j,B) appM(j,B)
366
367 *sum_even = 0.0;
368 *sum_odd = 0.0;
369
370 sub_nb S(A.size());
371 S.clear();
372 do { // for all subsets of A
373
374 // construct subset B of A corresponding to S
375 sub_nb B = A;
376 B.clear();
377 size_t Ssize = S.size();
378 for( size_t bit = 0; bit < Ssize; bit++ )
379 B += A[S[bit]];
380
381 if( S.size() % 2 )
382 *sum_odd += _tJ(j,B) * appM(j,B);
383 else
384 *sum_even += _tJ(j,B) * appM(j,B);
385
386 ++S;
387 } while( !S.empty() );
388 }
389
390
391 void MR::solvemcav() {
392 double sum_even, sum_odd;
393 double maxdev;
394 size_t maxruns = 1000;
395
396 makekindex();
397 for(size_t i=0; i<N; i++)
398 for(size_t _j=0; _j<con[i]; _j++)
399 M[i][_j]=0.1;
400
401 size_t run=0;
402 do {
403 maxdev=0.0;
404 run++;
405 for(size_t i=0; i<N; i++){ // for all i
406 for(size_t _j=0; _j<con[i]; _j++){ // for all j in N_i
407 size_t _i = kindex[i][_j];
408 size_t j = nb[i][_j];
409 assert( nb[j][_i] == i );
410
411 double newM = 0.0;
412 if( Updates() == UpdateType::FULL ) {
413 // find indices in nb[j] that do not correspond with i
414 sub_nb _nbj_min_i(con[j]);
415 _nbj_min_i -= kindex[i][_j];
416
417 // find indices in nb[i] that do not correspond with j
418 sub_nb _nbi_min_j(con[i]);
419 _nbi_min_j -= _j;
420
421 sum_subs(j, _nbj_min_i, &sum_even, &sum_odd);
422 newM = (tanh(theta[j]) * sum_even + sum_odd) / (sum_even + tanh(theta[j]) * sum_odd);
423
424 sum_subs(i, _nbi_min_j, &sum_even, &sum_odd);
425 double denom = sum_even + tanh(theta[i]) * sum_odd;
426 double numer = 0.0;
427 for(size_t _k=0; _k<con[i]; _k++) if(_k != _j) {
428 sum_subs(i, _nbi_min_j - _k, &sum_even, &sum_odd);
429 numer += tJ[i][_k] * cors[i][_j][_k] * (tanh(theta[i]) * sum_even + sum_odd);
430 }
431 newM -= numer / denom;
432 } else if( Updates() == UpdateType::LINEAR ) {
433 newM = T(j,_i);
434 for(size_t _l=0; _l<con[i]; _l++) if( _l != _j )
435 newM -= Omega(i,_j,_l) * tJ[i][_l] * cors[i][_j][_l];
436 for(size_t _l1=0; _l1<con[j]; _l1++) if( _l1 != _i )
437 for( size_t _l2=_l1+1; _l2<con[j]; _l2++) if( _l2 != _i)
438 newM += Gamma(j,_i,_l1,_l2) * tJ[j][_l1] * tJ[j][_l2] * cors[j][_l1][_l2];
439 }
440
441 double dev = newM - M[i][_j];
442 // dev *= 0.02;
443 if( fabs(dev) >= maxdev )
444 maxdev = fabs(dev);
445
446 newM = M[i][_j] + dev;
447 if( fabs(newM) > 1.0 )
448 newM = sign(newM);
449 M[i][_j] = newM;
450 }
451 }
452 } while((maxdev>Tol())&&(run<maxruns));
453
454 updateMaxDiff( maxdev );
455
456 if(run==maxruns){
457 if( Verbose() >= 1 )
458 cout << "solve_mcav: Convergence not reached (maxdev=" << maxdev << ")..." << endl;
459 }
460 }
461
462
463 void MR::solveM() {
464 for(size_t i=0; i<N; i++) {
465 if( Updates() == UpdateType::FULL ) {
466 // find indices in nb[i]
467 sub_nb _nbi(con[i]);
468
469 // calc numerator1 and denominator1
470 double sum_even, sum_odd;
471 sum_subs(i, _nbi, &sum_even, &sum_odd);
472
473 Mag[i] = (tanh(theta[i]) * sum_even + sum_odd) / (sum_even + tanh(theta[i]) * sum_odd);
474
475 } else if( Updates() == UpdateType::LINEAR ) {
476 sub_nb empty(con[i]);
477 empty.clear();
478 Mag[i] = T(i,empty);
479
480 for(size_t _l1=0; _l1<con[i]; _l1++)
481 for( size_t _l2=_l1+1; _l2<con[i]; _l2++)
482 Mag[i] += Gamma(i,_l1,_l2) * tJ[i][_l1] * tJ[i][_l2] * cors[i][_l1][_l2];
483 }
484 if(fabs(Mag[i])>1.)
485 Mag[i] = sign(Mag[i]);
486 }
487 }
488
489
490 void MR::init_cor() {
491 for( size_t i = 0; i < nrVars(); i++ ) {
492 vector<Factor> pairq;
493 if( Inits() == InitType::CLAMPING ) {
494 BP bpcav(*this, Properties()("updates",string("SEQMAX"))("tol", string("1e-9"))("maxiter", string("1000UL"))("verbose", string("0UL")));
495 bpcav.makeCavity( var(i) );
496 pairq = calcPairBeliefs( bpcav, delta(var(i)), false );
497 } else if( Inits() == InitType::EXACT ) {
498 JTree jtcav(*this, Properties()("updates",string("HUGIN"))("verbose", string("0UL")) );
499 jtcav.makeCavity( var(i) );
500 pairq = calcPairBeliefs( jtcav, delta(var(i)), false );
501 }
502 for( size_t jk = 0; jk < pairq.size(); jk++ ) {
503 VarSet::const_iterator kit = pairq[jk].vars().begin();
504 size_t j = findVar( *(kit) );
505 size_t k = findVar( *(++kit) );
506 pairq[jk].normalize(Prob::NORMPROB);
507 double cor = (pairq[jk][3] - pairq[jk][2] - pairq[jk][1] + pairq[jk][0]) - (pairq[jk][3] + pairq[jk][2] - pairq[jk][1] - pairq[jk][0]) * (pairq[jk][3] - pairq[jk][2] + pairq[jk][1] - pairq[jk][0]);
508 for( size_t _j = 0; _j < con[i]; _j++ ) if( nb[i][_j] == j )
509 for( size_t _k = 0; _k < con[i]; _k++ ) if( nb[i][_k] == k ) {
510 cors[i][_j][_k] = cor;
511 cors[i][_k][_j] = cor;
512 }
513 }
514 }
515 }
516
517
518 string MR::identify() const {
519 stringstream result (stringstream::out);
520 result << Name << GetProperties();
521 return result.str();
522 }
523
524
525 double MR::run() {
526 if( supported ) {
527 if( Verbose() >= 1 )
528 cout << "Starting " << identify() << "...";
529
530 clock_t tic = toc();
531 // Diffs diffs(nrVars(), 1.0);
532
533 M.resize(N);
534 for(size_t i=0; i<N; i++)
535 M[i].resize(kmax);
536
537 cors.resize(N);
538 for(size_t i=0; i<N; i++)
539 cors[i].resize(kmax);
540 for(size_t i=0; i<N; i++)
541 for(size_t j=0; j<kmax; j++)
542 cors[i][j].resize(kmax);
543
544 kindex.resize(N);
545 for(size_t i=0; i<N; i++)
546 kindex[i].resize(kmax);
547
548 if( Inits() == InitType::RESPPROP )
549 updateMaxDiff( init_cor_resp() );
550 else if( Inits() == InitType::EXACT )
551 init_cor(); // FIXME no MaxDiff() calculation
552 else if( Inits() == InitType::CLAMPING )
553 init_cor(); // FIXME no MaxDiff() calculation
554
555 solvemcav();
556
557 Mag.resize(N);
558 solveM();
559
560 if( Verbose() >= 1 )
561 cout << "MR needed " << toc() - tic << " clocks." << endl;
562
563 return 0.0;
564 } else
565 return NAN;
566 }
567
568
569 void MR::makekindex() {
570 for(size_t i=0; i<N; i++)
571 for(size_t j=0; j<con[i]; j++) {
572 size_t ij = nb[i][j]; // ij is the j'th neighbour of spin i
573 size_t k=0;
574 while( nb[ij][k] != i )
575 k++;
576 kindex[i][j] = k; // the j'th neighbour of spin i has spin i as its k'th neighbour
577 }
578 }
579
580
581 Factor MR::belief( const Var &n ) const {
582 if( supported ) {
583 size_t i = findVar( n );
584
585 Real x[2];
586 x[0] = 0.5 - Mag[i] / 2.0;
587 x[1] = 0.5 + Mag[i] / 2.0;
588
589 return Factor( n, x );
590 } else
591 return Factor();
592 }
593
594
595 vector<Factor> MR::beliefs() const {
596 vector<Factor> result;
597 for( size_t i = 0; i < nrVars(); i++ )
598 result.push_back( belief( var(i) ) );
599 return result;
600 }
601
602
603
604 MR::MR( const FactorGraph &fg, const Properties &opts ) : DAIAlgFG(fg, opts), supported(true) {
605 // check whether all vars in fg are binary
606 // check whether connectivity is <= kmax
607 for( size_t i = 0; i < fg.nrVars(); i++ )
608 if( (fg.var(i).states() > 2) || (fg.delta(fg.var(i)).size() > kmax) ) {
609 supported = false;
610 break;
611 }
612
613 if( !supported )
614 return;
615
616 // check whether all interactions are pairwise or single
617 for( size_t I = 0; I < fg.nrFactors(); I++ )
618 if( fg.factor(I).vars().size() > 2 ) {
619 supported = false;
620 break;
621 }
622
623 if( !supported )
624 return;
625
626 // create w and th
627 size_t _N = fg.nrVars();
628
629 double *w = new double[_N*_N];
630 double *th = new double[_N];
631
632 for( size_t i = 0; i < _N; i++ ) {
633 th[i] = 0.0;
634 for( size_t j = 0; j < _N; j++ )
635 w[i*_N+j] = 0.0;
636 }
637
638 for( size_t I = 0; I < fg.nrFactors(); I++ ) {
639 const Factor &psi = fg.factor(I);
640 if( psi.vars().size() == 1 ) {
641 size_t i = fg.findVar( *(psi.vars().begin()) );
642 th[i] += 0.5 * log(psi[1] / psi[0]);
643 } else if( psi.vars().size() == 2 ) {
644 size_t i = fg.findVar( *(psi.vars().begin()) );
645 VarSet::const_iterator jit = psi.vars().begin();
646 size_t j = fg.findVar( *(++jit) );
647
648 w[i*_N+j] += 0.25 * log(psi[3] * psi[0] / (psi[2] * psi[1]));
649 w[j*_N+i] += 0.25 * log(psi[3] * psi[0] / (psi[2] * psi[1]));
650
651 th[i] += 0.25 * log(psi[3] / psi[2] * psi[1] / psi[0]);
652 th[j] += 0.25 * log(psi[3] / psi[1] * psi[2] / psi[0]);
653 }
654 }
655
656 init(_N, w, th);
657
658 delete th;
659 delete w;
660 }
661
662
663 } // end of namespace dai