Some improvements to the new BP_dual/BBP/CBP code to make it compile
[libdai.git] / src / bbp.cpp
1 #include <dai/bp.h>
2 #include <dai/bbp.h>
3 #include <dai/gibbs.h>
4
5 // for makeBBPGraph: {
6 #include <sys/stat.h>
7 #include <sys/types.h>
8 #include <iostream>
9 #include <fstream>
10 // }
11
12 namespace dai {
13
14 using namespace std;
15
16 #define rnd_multi(x) rnd_int(0,(x)-1)
17
18 /// function to compute \sld\~w from w, Z_w, \sld w
19 Prob unnormAdjoint(const Prob &w, Real Z_w, const Prob &adj_w) {
20 assert(w.size()==adj_w.size());
21 Prob adj_w_unnorm(w.size(),0.0);
22 Real s=0.0;
23 for(size_t i=0; i<w.size(); i++) {
24 s += w[i]*adj_w[i];
25 }
26 for(size_t i=0; i<w.size(); i++) {
27 adj_w_unnorm[i] = (adj_w[i]-s)/Z_w;
28 }
29 return adj_w_unnorm;
30 }
31
32 /// Function to turn Gibbs state into b1_adj
33 /// calls bbp.init with calculated adjoints
34 void gibbsInitBBPCostFnAdj(BBP& bbp, const BP_dual &fg, bbp_cfn_t cfn_type, const vector<size_t>* stateP) {
35 if(cfn_type==bbp_cfn_t::bbp_cfn_t::cfn_bethe_ent) {
36 vector<Prob> b1_adj;
37 vector<Prob> b2_adj;
38 vector<Prob> psi1_adj;
39 vector<Prob> psi2_adj;
40 b1_adj.reserve(fg.nrVars());
41 psi1_adj.reserve(fg.nrVars());
42 b2_adj.reserve(fg.nrFactors());
43 psi2_adj.reserve(fg.nrFactors());
44 for(size_t i=0; i<fg.nrVars(); i++) {
45 size_t dim = fg.var(i).states();
46 int c = fg.nbV(i).size();
47 Prob p(dim,0.0);
48 for(size_t xi=0; xi<dim; xi++) {
49 p[xi] = (1-c)*(1+log(fg.belief1(i)[xi]));
50 }
51 b1_adj.push_back(p);
52
53 for(size_t xi=0; xi<dim; xi++) {
54 p[xi] = -fg.belief1(i)[xi];
55 }
56 psi1_adj.push_back(p);
57 }
58 for(size_t I=0; I<fg.nrFactors(); I++) {
59 size_t dim = fg.factor(I).states();
60 Prob p(dim,0.0);
61 for(size_t xI=0; xI<dim; xI++) {
62 p[xI] = 1 + log(fg.belief2(I)[xI]/fg.factor(I).p()[xI]);
63 }
64 b2_adj.push_back(p);
65
66 for(size_t xI=0; xI<dim; xI++) {
67 p[xI] = -fg.belief2(I)[xI]/fg.factor(I).p()[xI];
68 }
69 psi2_adj.push_back(p);
70 }
71 bbp.init(b1_adj, b2_adj, psi1_adj, psi2_adj);
72 } else if(cfn_type==bbp_cfn_t::cfn_factor_ent) {
73 vector<Prob> b2_adj;
74 b2_adj.reserve(fg.nrFactors());
75 for(size_t I=0; I<fg.nrFactors(); I++) {
76 size_t dim = fg.factor(I).states();
77 Prob p(dim,0.0);
78 for(size_t xI=0; xI<dim; xI++) {
79 double bIxI = fg.belief2(I)[xI];
80 if(bIxI<1.0e-15) {
81 p[xI] = -1.0e10;
82 } else {
83 p[xI] = 1+log(bIxI);
84 }
85 }
86 b2_adj.push_back(p);
87 }
88 bbp.init(get_zero_adj_1(fg), b2_adj);
89 } else if(cfn_type==bbp_cfn_t::cfn_var_ent) {
90 vector<Prob> b1_adj;
91 b1_adj.reserve(fg.nrVars());
92 for(size_t i=0; i<fg.nrVars(); i++) {
93 size_t dim = fg.var(i).states();
94 Prob p(dim,0.0);
95 for(size_t xi=0; xi<fg.var(i).states(); xi++) {
96 double bixi = fg.belief1(i)[xi];
97 if(bixi<1.0e-15) {
98 p[xi] = -1.0e10;
99 } else {
100 p[xi] = 1+log(bixi);
101 }
102 }
103 b1_adj.push_back(p);
104 }
105 bbp.init(b1_adj);
106 } else { // cost functions that use Gibbs sample: cfn_b, cfn_b2, cfn_exp
107 vector<size_t> state;
108 if(stateP==NULL) {
109 state = getGibbsState(fg,2*fg.doneIters());
110 } else {
111 state = *stateP;
112 }
113 assert(state.size()==fg.nrVars());
114
115 vector<Prob> b1_adj;
116 b1_adj.reserve(fg.nrVars());
117 for(size_t i=0; i<state.size(); i++) {
118 size_t n = fg.var(i).states();
119 Prob delta(n,0.0);
120 assert(/*0<=state[i] &&*/ state[i] < n);
121 double b = fg.belief1(i)[state[i]];
122 if(cfn_type==bbp_cfn_t::cfn_b) {
123 delta[state[i]] = 1.0;
124 } else if(cfn_type==bbp_cfn_t::cfn_b2) {
125 delta[state[i]] = b;
126 } else if(cfn_type==bbp_cfn_t::cfn_exp) {
127 delta[state[i]] = exp(b);
128 } else { abort(); }
129 b1_adj.push_back(delta);
130 }
131 bbp.init(b1_adj);
132 }
133 }
134
135 /// This function returns the actual value of the cost function whose
136 /// gradient with respect to singleton beliefs is given by
137 /// gibbsToB1Adj on the same arguments
138 Real gibbsCostFn(const BP_dual &fg, bbp_cfn_t cfn_type, const vector<size_t> *stateP) {
139 double cf=0.0;
140 if(cfn_type==bbp_cfn_t::cfn_bethe_ent) { // ignores state
141 cf = -fg.logZ();
142 } else if(cfn_type==bbp_cfn_t::cfn_var_ent) { // ignores state
143 for(size_t i=0; i<fg.nrVars(); i++) {
144 cf += -fg.belief1(i).entropy();
145 }
146 } else {
147 assert(stateP != NULL);
148 vector<size_t> state = *stateP;
149 assert(state.size()==fg.nrVars());
150 for(size_t i=0; i<fg.nrVars(); i++) {
151 double b = fg.belief1(i)[state[i]];
152 if(cfn_type==bbp_cfn_t::cfn_b) {
153 cf += b;
154 } else if(cfn_type==bbp_cfn_t::cfn_b2) {
155 cf += b*b/2;
156 } else if(cfn_type==bbp_cfn_t::cfn_exp) {
157 cf += exp(b);
158 } else { abort(); }
159 }
160 }
161 return cf;
162 }
163
164 vector<Prob> get_zero_adj_2(const BP_dual& bp_dual) {
165 vector<Prob> adj_2;
166 adj_2.reserve(bp_dual.nrFactors());
167 for(size_t I=0; I<bp_dual.nrFactors(); I++) {
168 adj_2.push_back(Prob(bp_dual.factor(I).states(),Real(0.0)));
169 }
170 return adj_2;
171 }
172
173 vector<Prob> get_zero_adj_1(const BP_dual& bp_dual) {
174 vector<Prob> adj_1;
175 adj_1.reserve(bp_dual.nrVars());
176 for(size_t i=0; i<bp_dual.nrVars(); i++) {
177 adj_1.push_back(Prob(bp_dual.var(i).states(),Real(0.0)));
178 }
179 return adj_1;
180 }
181
182 #define foreach_iI(_fg) \
183 for(size_t i=0, I=0, iI=0; (iI<_fg->nr_edges()) && ((i=_fg->edge(iI).first) || 1) && ((I=_fg->edge(iI).second) || 1); iI++)
184
185 #define foreach_iIj(_fg) \
186 for(size_t i=0, I=0, j=0, iIj=0; (iIj<_VFV_ind.size()) && \
187 ((i=get<0>(_VFV_ind[iIj])) || 1) && \
188 ((I=get<1>(_VFV_ind[iIj])) || 1) && \
189 ((j=get<2>(_VFV_ind[iIj])) || 1); iIj++)
190
191 #define foreach_IiJ(_fg) \
192 for(size_t I=0, i=0, J=0, IiJ=0; (IiJ<_FVF_ind.size()) && \
193 ((I=get<0>(_FVF_ind[IiJ])) || 1) && \
194 ((i=get<1>(_FVF_ind[IiJ])) || 1) && \
195 ((J=get<2>(_FVF_ind[IiJ])) || 1); IiJ++)
196
197
198 #define LOOP_ij(body) { \
199 size_t i_states = _fg->var(i).states(); \
200 size_t j_states = _fg->var(j).states(); \
201 if(_fg->var(i) > _fg->var(j)) { \
202 size_t xij=0; \
203 for(size_t xi=0; xi<i_states; xi++) { \
204 for(size_t xj=0; xj<j_states; xj++) { \
205 body; \
206 xij++; \
207 } \
208 } \
209 } else { \
210 size_t xij=0; \
211 for(size_t xj=0; xj<j_states; xj++) { \
212 for(size_t xi=0; xi<i_states; xi++) { \
213 body; \
214 xij++; \
215 } \
216 } \
217 } \
218 }
219
220 void BBP::RegenerateInds() {
221 // initialise _VFV and _VFV_ind
222 {
223 size_t ind=0;
224 _VFV.resize(_fg->nrVars());
225 _VFV_ind.clear();
226 for(size_t i=0; i<_fg->nrVars(); i++) {
227 foreach(size_t I, _fg->nbV(i)) {
228 vector<size_t>& v = _VFV[i][I];
229 v.resize(_fg->nrVars());
230 foreach(size_t j, _fg->nbF(I)) {
231 if(j!=i) {
232 v[j] = ind++;
233 _VFV_ind.push_back(tuple<size_t,size_t,size_t>(i,I,j));
234 }
235 }
236 }
237 }
238 }
239 // initialise _FVF
240 {
241 size_t ind=0;
242 _FVF.resize(_fg->nrFactors());
243 _FVF_ind.clear();
244 for(size_t I=0; I<_fg->nrFactors(); I++) {
245 foreach(size_t i, _fg->nbF(I)) {
246 vector<size_t>& v = _FVF[I][i];
247 v.resize(_fg->nrFactors());
248 foreach(size_t J, _fg->nbV(i)) {
249 if(J!=I) {
250 v[J] = ind++;
251 _FVF_ind.push_back(tuple<size_t,size_t,size_t>(I,i,J));
252 }
253 }
254 }
255 }
256 }
257 }
258
259 void BBP::RegenerateT() {
260 _T.clear();
261 foreach_iI(_fg) {
262 Prob prod(_fg->var(i).states(),1.0);
263 foreach(size_t J, _fg->nbV(i)) {
264 if(J!=I) {
265 prod *= _bp_dual->msgM(J,i);
266 }
267 }
268 _T.push_back(prod);
269 }
270 }
271
272 void BBP::RegenerateU() {
273 _U.clear();
274 foreach_iI(_fg) {
275 // Prob prod(_fg->var(i).states(), 1.0);
276 Prob prod(_fg->factor(I).states(), 1.0);
277 foreach(size_t j, _fg->nbF(I)) {
278 if(i!=j) {
279 Prob n_jI(_bp_dual->msgN(j,I));
280 const BP_dual::_ind_t* ind = &(_bp_dual->index(j,I));
281 // multiply prod with n_jI
282 for(size_t x_I = 0; x_I < prod.size(); x_I++)
283 prod[x_I] *= n_jI[(*ind)[x_I]];
284 // prod *= _bp_dual->msgN(j,I);
285 }
286 }
287 _U.push_back(prod);
288 }
289 }
290
291 void BBP::RegenerateS() {
292 _S.clear();
293 foreach_iIj(_fg) {
294 Factor prod(_fg->factor(I));
295 foreach(size_t k, _fg->nbF(I)) {
296 if(k!=i && k!=j) {
297 if(0) {
298 prod *= Factor(_fg->var(k), _bp_dual->msgN(k,I));
299 } else {
300 const BP_dual::_ind_t& ind = _bp_dual->index(k,I);
301 Prob p(_bp_dual->msgN(k,I));
302 for(size_t xI=0; xI<prod.states(); xI++) {
303 prod.p()[xI] *= p[ind[xI]];
304 }
305 }
306 }
307 }
308 // XXX improve efficiency?
309 // "Marginalize" onto i|j (unnormalized)
310 Prob marg;
311 marg = prod.marginal(VarSet(_fg->var(i)) | VarSet(_fg->var(j)), false).p();
312 _S.push_back(marg);
313 }
314 }
315
316 void BBP::RegenerateR() {
317 _R.clear();
318 foreach_IiJ(_fg) {
319 Prob prod(_fg->var(i).states(), 1.0);
320 foreach(size_t K, _fg->nbV(i)) {
321 if(K!=I && K!=J) {
322 prod *= _bp_dual->msgM(K,i);
323 }
324 }
325 _R.push_back(prod);
326 }
327 }
328
329 void BBP::RegenerateInputs() {
330 _adj_b_1_unnorm.clear();
331 for(size_t i=0; i<_fg->nrVars(); i++) {
332 _adj_b_1_unnorm.push_back(unnormAdjoint(_bp_dual->belief1(i).p(), _bp_dual->belief1Z(i), _adj_b_1[i]));
333 }
334 _adj_b_2_unnorm.clear();
335 for(size_t I=0; I<_fg->nrFactors(); I++) {
336 _adj_b_2_unnorm.push_back(unnormAdjoint(_bp_dual->belief2(I).p(), _bp_dual->belief2Z(I), _adj_b_2[I]));
337 }
338 }
339
340 /// initialise members for factor adjoints (call after RegenerateInputs)
341 void BBP::RegeneratePsiAdjoints() {
342 _adj_psi_1.clear();
343 _adj_psi_1.reserve(_fg->nrVars());
344 for(size_t i=0; i<_fg->nrVars(); i++) {
345 Prob p(_adj_b_1_unnorm[i]);
346 assert(p.size()==_fg->var(i).states());
347 foreach(size_t I, _fg->nbV(i)) {
348 p *= _bp_dual->msgM(I,i);
349 }
350 p += _init_adj_psi_1[i];
351 _adj_psi_1.push_back(p);
352 }
353 _adj_psi_2.clear();
354 _adj_psi_2.reserve(_fg->nrFactors());
355 for(size_t I=0; I<_fg->nrFactors(); I++) {
356 Prob p(_adj_b_2_unnorm[I]);
357 assert(p.size()==_fg->factor(I).states());
358 foreach(size_t i, _fg->nbF(I)) {
359 Prob n_iI(_bp_dual->msgN(i,I));
360 const BP_dual::_ind_t* ind = &(_bp_dual->index(i,I));
361 // multiply prod with n_jI
362 for(size_t x_I = 0; x_I < p.size(); x_I++)
363 p[x_I] *= n_iI[(*ind)[x_I]];
364 }
365 p += _init_adj_psi_2[I];
366 _adj_psi_2.push_back(p);
367 }
368 }
369
370 /// initialise members for messages adjoints (call after RegenerateInputs)
371 void BBP::RegenerateMessageAdjoints() {
372 _adj_n.resize(_fg->nr_edges());
373 _adj_m.resize(_fg->nr_edges());
374 _adj_n_unnorm.resize(_fg->nr_edges());
375 _adj_m_unnorm.resize(_fg->nr_edges());
376 _new_adj_n.clear();
377 _new_adj_n.reserve(_fg->nr_edges());
378 for(size_t iI=0; iI<_fg->nr_edges(); iI++) {
379 size_t i = _fg->edge(iI).first;
380 size_t I = _fg->edge(iI).second;
381 Prob prod(_fg->factor(I).p());
382 prod *= _adj_b_2_unnorm[I];
383 foreach(size_t j, _fg->nbF(I)) {
384 if(j!=i) { // for all j in I\i
385 Prob n_jI(_bp_dual->msgN(j,I));;
386 const BP_dual::_ind_t* ind = &(_bp_dual->index(j,I));
387 // multiply prod with n_jI
388 for(size_t x_I = 0; x_I < prod.size(); x_I++)
389 prod[x_I] *= n_jI[(*ind)[x_I]];
390 }
391 }
392 Prob marg( _fg->var(i).states(), 0.0 );
393 // ind is the precalculated Index(i,I) i.e. to x_I == k corresponds x_i == ind[k]
394 const BP_dual::_ind_t* ind = &(_bp_dual->index(i,I));
395 for( size_t r = 0; r < prod.size(); r++ )
396 marg[(*ind)[r]] += prod[r];
397
398 _new_adj_n.push_back(marg);
399 upMsgN(iI); // propagate new _new_adj_n to _adj_n and _adj_n_unnorm
400 }
401 _new_adj_m.clear();
402 _new_adj_m.reserve(_fg->nr_edges());
403 for(size_t iI=0; iI<_fg->nr_edges(); iI++) {
404 size_t i = _fg->edge(iI).first;
405 size_t I = _fg->edge(iI).second;
406 // Prob prod(_fg->var(i).states(),1.0);
407 Prob prod(_adj_b_1_unnorm[i]);
408 assert(prod.size()==_fg->var(i).states());
409 foreach(size_t J, _fg->nbV(i)) {
410 if(J!=I) { // for all J in i\I
411 prod *= _bp_dual->msgM(J,i);
412 }
413 }
414 _new_adj_m.push_back(prod);
415 upMsgM(iI); // propagate new _new_adj_m to _adj_m and _adj_m_unnorm
416 }
417 }
418
419 void BBP::Regenerate() {
420 RegenerateInds();
421 RegenerateT();
422 RegenerateU();
423 RegenerateS();
424 RegenerateR();
425 RegenerateInputs();
426 RegeneratePsiAdjoints();
427 RegenerateMessageAdjoints();
428 _iters = 0;
429 }
430
431 void BBP::calcNewN(size_t iI) {
432 size_t i=_fg->edge(iI).first;
433 size_t I=_fg->edge(iI).second;
434 _adj_psi_1[i] += T(i,I)*_adj_n_unnorm[iI];
435 _trip_end_t vfv_iI = _VFV[i][I];
436 Prob &new_adj_n_iI = _new_adj_n[iI];
437 new_adj_n_iI = Prob(_fg->var(i).states(),0.0);
438 foreach(size_t j, _fg->nbF(I)) {
439 if(j!=i) {
440 size_t iIj = vfv_iI[j];
441 Prob &p = _S[iIj];
442 size_t jI = _fg->VV2E(j,I);
443 LOOP_ij(
444 new_adj_n_iI[xi] += p[xij]*_adj_m_unnorm[jI][xj];
445 );
446 }
447 }
448 }
449
450 void BBP::calcNewM(size_t iI) {
451 size_t i=_fg->edge(iI).first;
452 size_t I=_fg->edge(iI).second;
453
454 Prob p(U(I,i));
455 Prob adj(_adj_m_unnorm[iI]);
456 const BP_dual::_ind_t* ind = &(_bp_dual->index(i,I));
457 for(size_t x_I = 0; x_I < p.size(); x_I++)
458 p[x_I] *= adj[(*ind)[x_I]];
459 _adj_psi_2[I] += p;
460
461 _new_adj_m[iI] = Prob(_fg->var(i).states(),0.0);
462 _trip_end_t fvf_Ii = _FVF[I][i];
463 foreach(size_t J, _fg->nbV(i)) {
464 if(J!=I) {
465 size_t iJ = _fg->VV2E(i,J);
466 _new_adj_m[iI] += _R[fvf_Ii[J]]*_adj_n_unnorm[iJ];
467 }
468 }
469 }
470
471 void BBP::upMsgM(size_t iI) {
472 _adj_m[iI] = _new_adj_m[iI];
473 _adj_m_unnorm[iI] = unnormAdjoint(_bp_dual->msgM(iI), _bp_dual->zM(iI), _adj_m[iI]);
474 }
475
476 void BBP::upMsgN(size_t iI) {
477 _adj_n[iI] = _new_adj_n[iI];
478 _adj_n_unnorm[iI] = unnormAdjoint(_bp_dual->msgN(iI), _bp_dual->zN(iI), _adj_n[iI]);
479 }
480
481 void BBP::doParUpdate() {
482 for(size_t iI=0; iI<_fg->nr_edges(); iI++) {
483 calcNewM(iI);
484 calcNewN(iI);
485 }
486 for(size_t iI=0; iI<_fg->nr_edges(); iI++) {
487 upMsgM(iI);
488 upMsgN(iI);
489 }
490 }
491
492 Real BBP::getUnMsgMag() {
493 Real s=0.0;
494 for(size_t iI=0; iI<_fg->nr_edges(); iI++) {
495 s += _adj_m_unnorm[iI].sumAbs();
496 s += _adj_n_unnorm[iI].sumAbs();
497 }
498 return s/_fg->nr_edges();
499 }
500
501 void BBP::getMsgMags(Real &s, Real &new_s) {
502 s=0.0; new_s=0.0;
503 for(size_t iI=0; iI<_fg->nr_edges(); iI++) {
504 s += _adj_m[iI].sumAbs();
505 s += _adj_n[iI].sumAbs();
506 new_s += _new_adj_m[iI].sumAbs();
507 new_s += _new_adj_n[iI].sumAbs();
508 }
509 Real t = _fg->nr_edges();
510 s /= t; new_s /= t;
511 }
512
513 /// run until change is less than given tolerance
514 void BBP::run(Real tol) {
515 size_t minIters = 2;
516 do {
517 _iters++;
518 doParUpdate();
519 } while((_iters < minIters || getUnMsgMag() > tol) && _iters < maxIter());
520 if(_iters==maxIter()) {
521 Real s, new_s;
522 getMsgMags(s,new_s);
523 cerr << "Warning: BBP didn't converge in " << _iters
524 << " iterations (unnorm message magnitude = " << getUnMsgMag()
525 << ", norm message mags = " << s << " -> " << new_s
526 << ")" << endl;
527 }
528 }
529
530 vector<size_t> getGibbsState(const BP_dual& fg, size_t iters) {
531 PropertySet gibbsProps;
532 gibbsProps.Set("iters", iters);
533 gibbsProps.Set("verbose", oneLess(fg.Verbose()));
534 Gibbs gibbs(fg, gibbsProps);
535 gibbs.run();
536 return gibbs.state();
537 }
538
539 void testUnnormAdjoint(int n);
540
541 /// given a state for each variable, use numerical derivatives
542 /// (multiplying a factor containing a variable by psi_1 adjustments)
543 /// to verify accuracy of _adj_psi_1.
544 /// 'h' controls size of perturbation.
545 /// 'bbpTol' controls tolerance of BBP run.
546 double numericBBPTest(const BP_dual& bp_dual, bbp_cfn_t cfn, double bbpTol, double h) {
547 // testUnnormAdjoint(4);
548
549 vector<size_t> state = getGibbsState(bp_dual,2*bp_dual.doneIters());
550 // calculate the value of the unperturbed cost function
551 Real cf0 = gibbsCostFn(bp_dual, cfn, &state);
552
553 // run BBP to estimate adjoints
554 BBP bbp(bp_dual);
555 gibbsInitBBPCostFnAdj(bbp, bp_dual, cfn, &state);
556 bbp.run(bbpTol);
557
558 Real d=0;
559
560 if(1) {
561 // verify bbp.adj_psi_1
562
563 // for each variable i
564 for(size_t i=0; i<bp_dual.nrVars(); i++) {
565 vector<double> adj_est;
566 // for each value xi
567 for(size_t xi=0; xi<bp_dual.var(i).states(); xi++) {
568 // create a copy of bp_dual which multiplies this into some factor containing the variable
569 BP_dual bp_dual_prb(bp_dual);
570 assert(bp_dual_prb.nbV(i).size()>0);
571
572 // create a perturbed psi_1[i] (there is no single-variable
573 // psi in libDAI so we initialize to all 1 and then multiply
574 // into nearest multivariate factor)
575 size_t n = bp_dual.var(i).states();
576 Prob psi_1_prb(n, 1.0);
577 psi_1_prb[xi] += h;
578 psi_1_prb.normalize();
579
580 // update bp_dual_prb
581 size_t I = bp_dual_prb.nbV(i)[0]; // use the first factor in list of neighbours of i
582 bp_dual_prb.factor(I) *= Factor(bp_dual.var(i), psi_1_prb);
583 bp_dual_prb.init(bp_dual.var(i)); // reset messages involving i
584 // bp_dual_prb.init();
585
586 // run the copy to convergence
587 bp_dual_prb.run();
588
589 // calculate the new value of the cost function
590 Real cf_prb = gibbsCostFn(bp_dual_prb, cfn, &state);
591
592 // use this to estimate the adjoint for i
593 // XXX why is it off by a factor of 2?
594 adj_est.push_back((cf_prb-cf0)/h);
595 }
596 Prob p_adj_est(adj_est.begin(), adj_est.end());
597 // compare this numerical estimate to the BBP estimate; sum the distances
598 cerr << "i: " << i
599 << ", p_adj_est: " << p_adj_est
600 << ", bbp.adj_psi_1(i): " << bbp.adj_psi_1(i) << endl;
601 d += dist(p_adj_est, bbp.adj_psi_1(i), Prob::DISTL1);
602 }
603 }
604 if(1) {
605 // verify bbp.adj_n and bbp.adj_m
606
607 // We actually want to check the responsiveness of objective
608 // function to changes in the final messages. But at the end of a
609 // BBP run, the message adjoints are for the initial messages (and
610 // they should be close to zero, see paper). So this resets the
611 // BBP adjoints to the refer to the desired final messages
612 bbp.RegenerateMessageAdjoints();
613
614 // for each variable i
615 for(size_t i=0; i<bp_dual.nrVars(); i++) {
616 // for each factor I ~ i
617 foreach(size_t I, bp_dual.nbV(i)) {
618 vector<double> adj_n_est;
619 // for each value xi
620 for(size_t xi=0; xi<bp_dual.var(i).states(); xi++) {
621 BP_dual bp_dual_prb(bp_dual);
622 // make h-sized change to newMsgN
623 bp_dual_prb.newMsgN(i,I)[xi] += h;
624 // recalculate beliefs
625 bp_dual_prb.CalcBeliefs();
626 // get cost function value
627 Real cf_prb = gibbsCostFn(bp_dual_prb, cfn, &state);
628 // add it to list of adjoints
629 adj_n_est.push_back((cf_prb-cf0)/h);
630 }
631
632 vector<double> adj_m_est;
633 // for each value xi
634 for(size_t xi=0; xi<bp_dual.var(i).states(); xi++) {
635 BP_dual bp_dual_prb(bp_dual);
636 // make h-sized change to newMsgM
637 bp_dual_prb.newMsgM(I,i)[xi] += h;
638 // recalculate beliefs
639 bp_dual_prb.CalcBeliefs();
640 // get cost function value
641 Real cf_prb = gibbsCostFn(bp_dual_prb, cfn, &state);
642 // add it to list of adjoints
643 adj_m_est.push_back((cf_prb-cf0)/h);
644 }
645
646 Prob p_adj_n_est(adj_n_est.begin(), adj_n_est.end());
647 // compare this numerical estimate to the BBP estimate; sum the distances
648 cerr << "i: " << i << ", I: " << I
649 << ", adj_n_est: " << p_adj_n_est
650 << ", bbp.adj_n(i,I): " << bbp.adj_n(i,I) << endl;
651 d += dist(p_adj_n_est, bbp.adj_n(i,I), Prob::DISTL1);
652
653 Prob p_adj_m_est(adj_m_est.begin(), adj_m_est.end());
654 // compare this numerical estimate to the BBP estimate; sum the distances
655 cerr << "i: " << i << ", I: " << I
656 << ", adj_m_est: " << p_adj_m_est
657 << ", bbp.adj_m(I,i): " << bbp.adj_m(I,i) << endl;
658 d += dist(p_adj_m_est, bbp.adj_m(I,i), Prob::DISTL1);
659 }
660 }
661 }
662 if(0) {
663 // verify bbp.adj_b_1
664 for(size_t i=0; i<bp_dual.nrVars(); i++) {
665 vector<double> adj_b_1_est;
666 // for each value xi
667 for(size_t xi=0; xi<bp_dual.var(i).states(); xi++) {
668 BP_dual bp_dual_prb(bp_dual);
669
670 // make h-sized change to b_1(i)[x_i]
671 bp_dual_prb._beliefs.b1[i][xi] += h;
672
673 // get cost function value
674 Real cf_prb = gibbsCostFn(bp_dual_prb, cfn, &state);
675
676 // add it to list of adjoints
677 adj_b_1_est.push_back((cf_prb-cf0)/h);
678 }
679 Prob p_adj_b_1_est(adj_b_1_est.begin(), adj_b_1_est.end());
680 // compare this numerical estimate to the BBP estimate; sum the distances
681 cerr << "i: " << i
682 << ", adj_b_1_est: " << p_adj_b_1_est
683 << ", bbp.adj_b_1(i): " << bbp.adj_b_1(i) << endl;
684 d += dist(p_adj_b_1_est, bbp.adj_b_1(i), Prob::DISTL1);
685 }
686 }
687
688 // return total of distances
689 return d;
690 }
691
692 void testUnnormAdjoint(int n) {
693 double h = 1.0e-5;
694 Prob q_unnorm(n);
695 // create a random q_unnorm
696 for(int i=0; i<n; i++) {
697 q_unnorm[i] = exp(4*rnd_stdnormal());
698 }
699 // normalize it to get q and Z_q
700 Prob q(q_unnorm);
701 double Z_q = q.normalize();
702 // cost function V just selects one (random) element of q
703 int vk = rnd_multi(n);
704 double V = q[vk];
705 // calculate adj_q for this V
706 Prob adj_q(n, 0.0);
707 adj_q[vk] = 1;
708 // use unnormAdjoint to get adj_q_unnorm
709 Prob adj_q_unnorm = unnormAdjoint(q, Z_q, adj_q);
710 // with perturbations of q_unnorm, test that adj_q_unnorm is correct
711 Prob adj_q_unnorm_numeric(n, 0.0);
712 for(int i=0; i<n; i++) {
713 Prob q_unnorm_prb = q_unnorm;
714 q_unnorm_prb[i] += h;
715 Prob q_prb(q_unnorm_prb);
716 q_prb.normalize();
717 DAI_PV(q_unnorm_prb);
718 DAI_PV(q_prb);
719 double V_prb = q_prb[vk];
720 adj_q_unnorm_numeric[i] = (V_prb-V)/h;
721 }
722 DAI_PV(q_unnorm);
723 DAI_PV(q);
724 DAI_PV(Z_q);
725 DAI_PV(vk);
726 DAI_PV(V);
727 DAI_PV(adj_q_unnorm);
728 DAI_PV(adj_q_unnorm_numeric);
729 }
730
731 void makeBBPGraph(const BP_dual& bp_dual, bbp_cfn_t cfn) {
732 double tiny=1.0e-7;
733 vector<size_t> state = getGibbsState(bp_dual,2*bp_dual.doneIters());
734 const char *graphs_dir = "./bbp-data";
735 mkdir(graphs_dir, -1);
736 size_t num_samples=500;
737 for(size_t i=0; i<bp_dual.nrVars(); i++) {
738 for(size_t xi=0; xi<bp_dual.var(i).states(); xi++) {
739 if(bp_dual.belief1(i)[xi]<tiny || abs(bp_dual.belief1(i)[xi]-1)<tiny)
740 continue;
741 char *fn;
742 asprintf(&fn,"%s/i:%d-xi:%d.out",graphs_dir,(int)i,(int)xi);
743 ofstream os(fn, ios_base::out|ios_base::trunc);
744
745 for(size_t n=0; n<num_samples; n++) {
746 double c=((double)n)/(num_samples-1);
747 Prob psi_1_prb(bp_dual.var(i).states(),1.0-c);
748 psi_1_prb[xi] = 1.0;
749
750 BP_dual bp_dual_prb(bp_dual);
751 assert(bp_dual_prb.nbV(i).size()>0);
752 size_t I = bp_dual_prb.nbV(i)[0]; // use the first factor in list of neighbours of i
753 bp_dual_prb.factor(I) *= Factor(bp_dual.var(i), psi_1_prb);
754 bp_dual_prb.init(bp_dual.var(i)); // reset messages involving i
755
756 // run the copy to convergence
757 bp_dual_prb.run();
758
759 Real cf = gibbsCostFn(bp_dual_prb, cfn, &state);
760
761 os << n << "\t" << cf << endl;
762 }
763
764 os.close();
765 free(fn);
766 }
767 }
768 }
769
770 } // end of namespace dai