aa85ab8e4da7633e8f3e3f18b9349ec5b8ef2f75
[libdai.git] / examples / example_imagesegmentation.cpp
1 /* This file is part of libDAI - http://www.libdai.org/
2 *
3 * libDAI is licensed under the terms of the GNU General Public License version
4 * 2, or (at your option) any later version. libDAI is distributed without any
5 * warranty. See the file COPYING for more details.
6 *
7 * Copyright (C) 2006-2010 Joris Mooij [joris dot mooij at libdai dot org]
8 * Copyright (C) 2006-2007 Radboud University Nijmegen, The Netherlands
9 */
10
11
12 #include <iostream>
13 #include <dai/alldai.h> // Include main libDAI header file
14 #include <CImg.h> // This example needs a recent version of CImg to be installed
15
16
17 using namespace dai;
18 using namespace std;
19 using namespace cimg_library;
20
21
22 /// Converts an image into a FactorGraph as described in section 1.1.3 of [\ref Moo08]
23 /** The grayscale pixel values of the image are transformed by a nonlinear transformation
24 into the local fields for each binary Ising variable, and the pairwise interactions
25 are uniform.
26 \param img The input image (should be grayscale, with 0 corresponding to background
27 and 255 corresponding to foreground)
28 \param J The pairwise interaction strength (should be positive for smoothing)
29 \param th_min Minimal value of local field strength (corresponding to background)
30 \param th_max Maximal value of local field strength (corresponding to foreground)
31 \param scale Typical difference in pixel values between fore- and background
32 \param pbg Probability that a pixel belongs to the background (in percent)
33 \param evidence The local evidence (represented as a grayscale image)
34 **/
35 template<class T>
36 FactorGraph img2fg( const CImg<T> &img, double J, double th_min, double th_max, double scale, double pbg, CImg<unsigned char> &evidence ) {
37 vector<Var> vars;
38 vector<Factor> factors;
39
40 size_t dimx = img.width(); // Width of the image in pixels
41 size_t dimy = img.height(); // Height of the image in pixels
42 size_t N = dimx * dimy; // One variable for each pixel
43
44 // Create variables
45 cout << " Image width: " << dimx << endl;
46 cout << " Image height: " << dimy << endl;
47 cout << " Pairwise interaction strength: " << J << endl;
48 cout << " Minimal local evidence strength: " << th_min << endl;
49 cout << " Maximal local evidence strength: " << th_max << endl;
50 cout << " Scale of pixel values: " << scale << endl;
51 cout << " Percentage of background: " << pbg << endl;
52 cout << " Creating " << N << " variables..." << endl;
53 // Reserve memory for the variables
54 vars.reserve( N );
55 // Create a binary variable for each pixel
56 for( size_t i = 0; i < N; i++ )
57 vars.push_back( Var( i, 2 ) );
58
59 // Build image histogram
60 CImg<float> hist = img.get_channel( 0 ).get_histogram( 256, 0, 255 );
61 size_t cum_hist = 0;
62
63 // Find the critical level which corresponds with the seperation
64 // between foreground and background, assuming that the percentage
65 // of pixels in the image that belong to the background is pbg
66 size_t level = 0;
67 for( level = 0; level < 256; level++ ) {
68 cum_hist += (size_t)hist(level);
69 if( cum_hist > pbg * dimx * dimy / 100.0 )
70 break;
71 }
72
73 // Create factors
74 cout << " Creating " << (3 * N - dimx - dimy) << " factors..." << endl;
75 // Reserve memory for the factors
76 factors.reserve( 3 * N - dimx - dimy );
77 // th_avg is the local field strength that would correspond with pixel value level
78 // th_width is the width of the local field strength range
79 double th_avg = (th_min + th_max) / 2.0;
80 double th_width = (th_max - th_min) / 2.0;
81 // For each pixel
82 for( size_t i = 0; i < dimx; i++ )
83 for( size_t j = 0; j < dimy; j++ ) {
84 // Add a pairwise interaction with the left neighboring pixel
85 if( i >= 1 )
86 factors.push_back( createFactorIsing( vars[i*dimy+j], vars[(i-1)*dimy+j], J ) );
87 // Add a pairwise interaction with the upper neighboring pixel
88 if( j >= 1 )
89 factors.push_back( createFactorIsing( vars[i*dimy+j], vars[i*dimy+(j-1)], J ) );
90 // Get the pixel value
91 double x = img(i,j);
92 // Apply the nonlinear transformation to get the local field strength
93 double th = th_avg + th_width * tanh( (x - level) / scale );
94 // Add a single-variable interaction with strength th
95 factors.push_back( createFactorIsing( vars[i*dimy+j], th ) );
96
97 // For visualization, we calculate a grayscale level corresponding to the local field strength
98 unsigned char g = (unsigned char)((tanh(th) + 1.0) / 2.0 * 255.0);
99 // and store it in the evidence image
100 if( g > 127 ) {
101 evidence(i,j,0) = 255;
102 evidence(i,j,1) = 2 * (g - 127);
103 evidence(i,j,2) = 2 * (g - 127);
104 } else {
105 evidence(i,j,0) = 0;
106 evidence(i,j,1) = 0;
107 evidence(i,j,2) = 2*g;
108 }
109 }
110
111 // Create the factor graph out of the variables and factors
112 cout << "Creating the factor graph..." << endl;
113 return FactorGraph( factors.begin(), factors.end(), vars.begin(), vars.end(), factors.size(), vars.size() );
114 }
115
116
117 /// Perform an inference algorithm on a factorgraph, and visualize intermediate beliefs
118 /** \param fg The factorgraph (should have only binary variables)
119 * \param algOpts String specifying the inference algorithm and its options in the format "NAME[key1=val1,key2=val2,...,keyn=valn]"
120 * \param maxIter Maximum number of iterations to perform
121 * \param tol Convergence tolerance
122 * \param m Final magnetizations
123 * \param dimx Image width
124 * \param dimy Image height
125 * \param disp Handle for displaying the intermediate beliefs
126 */
127 double doInference( FactorGraph& fg, string algOpts, size_t maxIter, double tol, vector<double> &m, size_t dimx, size_t dimy, CImgDisplay &disp ) {
128 // Construct inference algorithm
129 cout << "Inference algorithm: " << algOpts << endl;
130 cout << "Constructing inference algorithm object..." << endl;
131 InfAlg* ia = newInfAlgFromString( algOpts, fg );
132
133 // Initialize inference algorithm
134 cout << "Initializing inference algorithm..." << endl;
135 ia->init();
136
137 // Initialize vector for storing the magnetizations
138 m = vector<double>( fg.nrVars(), 0.0 );
139
140 // Construct an image that will hold the intermediate single-variable beliefs
141 CImg<unsigned char> image( dimx, dimy, 1, 3 );
142
143 // maxDiff stores the current convergence level
144 double maxDiff = 1.0;
145
146 // Iterate while maximum number of iterations has not been
147 // reached and requested convergence level has not been reached
148 cout << "Starting inference algorithm..." << endl;
149 for( size_t iter = 0; iter < maxIter && maxDiff > tol; iter++ ) {
150 // Set magnetizations to beliefs
151 for( size_t i = 0; i < fg.nrVars(); i++ )
152 m[i] = ia->beliefV(i)[1] - ia->beliefV(i)[0];
153
154 // For each pixel, calculate a color coded magnetization
155 // and store it in the image for visualization
156 for( size_t i = 0; i < dimx; i++ )
157 for( size_t j = 0; j < dimy; j++ ) {
158 unsigned char g = (unsigned char)((m[i*dimy+j] + 1.0) / 2.0 * 255.0);
159 if( g > 127 ) {
160 image(i,j,0) = 255;
161 image(i,j,1) = 2 * (g - 127);
162 image(i,j,2) = 2 * (g - 127);
163 } else {
164 image(i,j,0) = 0;
165 image(i,j,1) = 0;
166 image(i,j,2) = 2*g;
167 }
168 }
169
170 // Display the image with the current beliefs
171 disp = image;
172
173 // Perform the requested inference algorithm
174 // (which could be limited to perform only 1 iteration)
175 maxDiff = ia->run();
176
177 // Output progress
178 cout << " Iterations = " << iter << ", maxDiff = " << maxDiff << endl;
179 }
180 cout << "Finished inference algorithm" << endl;
181
182 // Clean up inference algorithm
183 delete ia;
184
185 // Return reached convergence level
186 return maxDiff;
187 }
188
189
190 /// Main program
191 /** This example shows how one can use approximate inference in factor graphs
192 * on a simple vision task: given two images, identify smooth regions where these
193 * two images differ more than some threshold. This can be used to seperate
194 * foreground from background if one image contains the background and the other
195 * one the combination of background and foreground.
196 */
197 int main(int argc,char **argv) {
198 cout << "This program is part of libDAI - http://www.libdai.org/" << endl;
199 cout << "(Use the option -h for getting help with the command line arguments.)" << endl;
200 // Display program usage, when invoked from the command line with option '-h'
201 cimg_usage( "This example shows how libDAI can be used for a simple image segmentation task" );
202 // Get command line arguments
203 const char* file_i1 = cimg_option( "-i1", "example_img_in1.jpg", "Input image 1" );
204 const char* file_i2 = cimg_option( "-i2", "example_img_in2.jpg", "Input image 2" );
205 const char* file_o1 = cimg_option( "-o1", "example_img_out1.jpg", "Output image (local evidence)" );
206 const char* file_o2 = cimg_option( "-o2", "example_img_out2.jpg", "Output image (magnetizations)" );
207 const double J = cimg_option( "-J", 2.4, "Pairwise interaction strength (i.e., smoothing strength)" );
208 const double th_min = cimg_option( "-thmin", -3.0, "Local evidence strength of background" );
209 const double th_max = cimg_option( "-thmax", 3.2, "Local evidence strength of foreground" );
210 const double scale = cimg_option( "-scale", 40.0, "Typical difference in pixel values between fore- and background" );
211 const double pbg = cimg_option( "-pbg", 90.0, "Percentage of background in image" );
212 const char *file_fg = cimg_option( "-fg", "", "Output factor graph" );
213
214 // Read input images
215 cout << endl;
216 cout << "Reading input image 1 (" << file_i1 << ")..." << endl;
217 CImg<unsigned char> image1 = CImg<>( file_i1 );
218 cout << "Reading input image 2 (" << file_i2 << ")..." << endl;
219 CImg<unsigned char> image2 = CImg<>( file_i2 );
220
221 // Check image sizes
222 if( (image1.width() != image2.width()) || (image1.height() != image2.height()) )
223 cerr << "Error: input images should have same size." << endl;
224 size_t dimx = image1.width();
225 size_t dimy = image1.height();
226
227 // Display input images
228 cout << "Displaying input image 1..." << endl;
229 CImgDisplay disp1( image1, "Input image 1", 0 );
230 cout << "Displaying input image 2..." << endl;
231 CImgDisplay disp2( image2, "Input image 2", 0 );
232
233 // Construct absolute difference image
234 cout << "Constructing difference image..." << endl;
235 CImg<int> image3( image1 );
236 image3 -= image2;
237 image3.abs();
238 // Normalize difference image
239 image3.norm( 1 ); // 1 = L1, 2 = L2, -1 = Linf
240
241 // Normalize the difference by the average value of the background image
242 for( size_t i = 0; i < dimx; i++ ) {
243 for( size_t j = 0; j < dimy; j++ ) {
244 int avg = 0;
245 for( int c = 0; c < image1.spectrum(); c++ )
246 avg += image1( i, j, c );
247 avg /= image1.spectrum();
248 image3( i, j, 0 ) /= (1.0 + avg / 255.0);
249 }
250 }
251 image3.normalize( 0, 255 );
252
253 // Display difference image
254 cout << "Displaying difference image..." << endl;
255 CImgDisplay disp3( image3, "Relative difference of both inputs", 0 );
256
257 // Convert difference image into a factor graph and store
258 // the local evidence in image4 for visualization
259 CImg<unsigned char> image4( dimx, dimy, 1, 3 );
260 cout << "Converting difference image into factor graph..." << endl;
261 FactorGraph fg = img2fg( image3, J, th_min, th_max, scale, pbg, image4 );
262
263 // Display local evidence
264 cout << "Displaying local evidence..." << endl;
265 CImgDisplay disp4( image4, "Local evidence", 0 );
266 cout << "Saving local evidence as JPEG in " << file_o1 << endl;
267 image4.save_jpeg( file_o1, 100 );
268 if( strlen( file_fg ) > 0 ) {
269 cout << "Saving factor graph as " << file_fg << endl;
270 fg.WriteToFile( file_fg );
271 }
272
273 // Solve the inference problem and visualize intermediate steps
274 CImgDisplay disp5( dimx, dimy, "Beliefs during inference", 0 );
275 vector<double> m; // Stores the final magnetizations
276 cout << "Solving the inference problem...please be patient!" << endl;
277 // doInference( fg, "BP[updates=SEQFIX,maxiter=1,tol=1e-9,verbose=0,logdomain=0]", 100, 1e-9, m, dimx, dimy, disp5 );
278 doInference( fg, "BP[updates=SEQMAX,maxiter=1,tol=1e-9,verbose=0,logdomain=0]", 100, 1e-9, m, dimx, dimy, disp5 );
279 // doInference( fg, "BP[updates=SEQRND,maxiter=1,tol=1e-9,verbose=0,logdomain=0]", 100, 1e-9, m, dimx, dimy, disp5 );
280 // doInference( fg, "HAK[doubleloop=0,clusters=LOOP,init=UNIFORM,loopdepth=4,tol=1e-9,maxiter=1,verbose=3]", 1000, 1e-5, m, dimx, dimy, disp5 );
281 // doInference( fg, "HAK[doubleloop=0,clusters=BETHE,init=UNIFORM,maxiter=1,tol=1e-9,verbose=3]", 1000, 1e-5, m, dimx, dimy, disp5 );
282 // doInference( fg, "MF[tol=1e-9,maxiter=1,damping=0.0,init=RANDOM,updates=HARDSPIN]", 1000, 1e-5, m, dimx, dimy, disp5 );
283 // doInference( fg, "TREEEP[type=ORG,tol=1e-9,maxiter=10000,verbose=3]", 1000, 1e-5, m, dimx, dimy, disp5 );
284 // doInference( fg, "GIBBS[iters=1,burnin=0]", 100, 1e-9, m, dimx, dimy, disp5 );
285 // doInference( fg, "GIBBS[iters=1,burnin=0]", 100, 1e-9, m, dimx, dimy, disp5 );
286 // doInference( fg, "GIBBS[iters=1,burnin=0]", 100, 1e-9, m, dimx, dimy, disp5 );
287
288 // Visualize the final magnetizations
289 for( size_t i = 0; i < dimx; i++ )
290 for( size_t j = 0; j < dimy; j++ ) {
291 unsigned char g = (unsigned char)((m[i*dimy+j] + 1.0) / 2.0 * 255.0);
292 if( g > 127 ) {
293 image4(i,j,0) = image2(i,j,0);
294 image4(i,j,1) = image2(i,j,1);
295 image4(i,j,2) = image2(i,j,2);
296 } else
297 for( size_t c = 0; c < (size_t)image4.spectrum(); c++ )
298 image4(i,j,c) = 255;
299 }
300 cout << "Displaying the final result of the segmentation problem..." << endl;
301 CImgDisplay main_disp( image4, "Foreground/background segmentation result", 0 );
302
303 cout << "Saving the final result of the segmentation problem as JPEG in " << file_o2 << endl;
304 image4.save_jpeg( file_o2, 100 );
305
306 cout << "Close the last image display in order to finish." << endl;
307 while( !main_disp.is_closed() )
308 cimg::wait( 40 );
309
310 return 0;
311 }