+ removed load_genomic dependency
[qpalma.git] / dyn_prog / Mathmatics.h
1 #ifndef __MATHMATICS_H_
2 #define __MATHMATICS_H_
3
4 #include "common.h"
5 #include "io.h"
6
7 #include <math.h>
8 #include <stdio.h>
9
10 //define finite for win32
11 #ifdef _WIN32
12 #include <float.h>
13 #define finite _finite
14 #define isnan _isnan
15 #endif
16
17 #ifndef NAN
18 #include <stdlib.h>
19 #define NAN (strtod("NAN",NULL))
20 #endif
21
22 #ifdef SUNOS
23 extern "C" int finite(double);
24 #endif
25
26 /** Mathematical Functions.
27 * Class which collects generic mathematical functions
28 */
29 class CMath
30 {
31 public:
32 /**@name Constructor/Destructor.
33 */
34 //@{
35 ///Constructor - initializes log-table
36 CMath();
37
38 ///Destructor - frees logtable
39 virtual ~CMath();
40 //@}
41
42 /**@name min/max/abs functions.
43 */
44 //@{
45
46 ///return the minimum of two integers
47 //
48 template <class T>
49 static inline T min(T a, T b)
50 {
51 return ((a)<=(b))?(a):(b);
52 }
53
54 ///return the maximum of two integers
55 template <class T>
56 static inline T max(T a, T b)
57 {
58 return ((a)>=(b))?(a):(b);
59 }
60
61 ///return the maximum of two integers
62 template <class T>
63 static inline T abs(T a)
64 {
65 return ((a)>=(0))?(a):(-a);
66 }
67 //@}
68
69 /**@name misc functions */
70 //@{
71
72 /// crc32
73 static UINT crc32(BYTE *data, INT len);
74
75 static inline REAL round(REAL d)
76 {
77 return floor(d+0.5);
78 }
79
80 /// signum of type T variable a
81 template <class T>
82 static inline REAL sign(REAL a)
83 {
84 if (a==0)
85 return 0;
86 else return (a<0) ? (-1) : (+1);
87 }
88
89 /// swap floats a and b
90 template <class T>
91 static inline void swap(T & a,T &b)
92 {
93 T c=a ;
94 a=b; b=c ;
95 }
96
97 /// x^2
98 template <class T>
99 static inline T sq(T x)
100 {
101 return x*x;
102 }
103
104 static inline LONG factorial(INT n)
105 {
106 LONG res=1 ;
107 for (int i=2; i<=n; i++)
108 res*=i ;
109 return res ;
110 }
111
112 static inline LONG nchoosek(INT n, INT k)
113 {
114 long res=1 ;
115
116 for (INT i=n-k+1; i<=n; i++)
117 res*=i ;
118
119 return res/factorial(k) ;
120 }
121
122 /** performs a bubblesort on a given matrix a.
123 * it is sorted from in ascending order from top to bottom
124 * and left to right */
125 static void sort(INT *a, INT cols, INT sort_col=0) ;
126 static void sort(REAL *a, INT*idx, INT N) ;
127
128 /** performs a quicksort on an array output of length size
129 * it is sorted from in ascending (for type T) */
130 template <class T>
131 static void qsort(T* output, INT size) ;
132
133 /** performs a quicksort on an array output of length size
134 * it is sorted from in ascending order
135 * (for type T1) and returns the index (type T2)
136 * matlab alike [sorted,index]=sort(output)
137 */
138 template <class T1,class T2>
139 static void qsort(T1* output, T2* index, INT size);
140
141 /** performs a quicksort on an array output of length size
142 * it is sorted from in ascending order
143 * (for type T1) and returns the index (type T2)
144 * matlab alike [sorted,index]=sort(output)
145 */
146 template <class T1,class T2>
147 static void qsort_backward(T1* output, T2* index, INT size);
148
149 /* finds the smallest element in output and puts that element as the
150 first element */
151 template <class T>
152 static void min(REAL* output, T* index, INT size);
153
154 /* finds the n smallest elements in output and puts these elements as the
155 first n elements */
156 template <class T>
157 static void nmin(REAL* output, T* index, INT size, INT n);
158
159 /* performs a inplace unique of a vector of type T using quicksort
160 * returns the new number of elements */
161 template <class T>
162 static INT unique(T* output, INT size)
163 {
164 qsort(output, size) ;
165 INT i,j=0 ;
166 for (i=0; i<size; i++)
167 if (i==0 || output[i]!=output[i-1])
168 output[j++]=output[i] ;
169 return j ;
170 }
171
172 /* finds an element in a sorted array via binary search
173 * returns -1 if not found */
174 static inline INT fast_find(WORD* output, INT size, WORD elem)
175 {
176 INT start=0, end=size-1, middle=size/2 ;
177
178 if (output[start]>elem || output[end]<elem)
179 return -1 ;
180
181 while (1)
182 {
183 if (output[middle]>elem)
184 {
185 end = middle ;
186 middle=start+(end-start)/2 ;
187 } ;
188 if (output[middle]<elem)
189 {
190 start = middle ;
191 middle=start+(end-start)/2 ;
192 }
193 if (output[middle]==elem)
194 return middle ;
195 if (end-start<=1)
196 {
197 if (output[start]==elem)
198 return start ;
199 if (output[end]==elem)
200 return end ;
201 return -1 ;
202 }
203 }
204 }
205
206 /* finds an element in a sorted array via binary search
207 * that is smaller-equal elem und the next element is larger-equal */
208 static inline INT fast_find_range(REAL* output, INT size, REAL elem)
209 {
210 INT start=0, end=size-2, middle=(size-2)/2 ;
211
212 if (output[start]>elem)
213 return -1 ;
214 if (output[end]<=elem)
215 return size-1 ;
216
217 while (1)
218 {
219 if (output[middle]>elem)
220 {
221 end = middle ;
222 middle=start+(end-start)/2 ;
223 } ;
224 if (output[middle]<elem)
225 {
226 start = middle ;
227 middle=start+(end-start)/2 ;
228 }
229 if (output[middle]<=elem && output[middle+1]>=elem)
230 return middle ;
231 if (end-start<=1)
232 {
233 if (output[start]<=elem && output[start+1]>=elem)
234 return start ;
235 return end ;
236 }
237 }
238 }
239
240 /** calculates ROC into (fp,tp)
241 * from output and label of length size
242 * returns index with smallest error=fp+fn
243 */
244 static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
245 //@}
246
247 /// returns the mutual information of p which is given in logspace
248 /// where p,q are given in logspace
249 static double mutual_info(REAL* p1, REAL* p2, INT len);
250
251 /// returns the relative entropy H(P||Q),
252 /// where p,q are given in logspace
253 static double relative_entropy(REAL* p, REAL* q, INT len);
254
255 /// returns entropy of p which is given in logspace
256 static double entropy(REAL* p, INT len);
257
258 /**@name summing functions */
259 //@{
260 /** sum logarithmic probabilities.
261 * Probability measures are summed up but are now given in logspace where
262 * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
263 * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
264 #ifdef USE_LOGCACHE
265 static inline REAL logarithmic_sum(REAL p, REAL q)
266 {
267 if (finite(p))
268 {
269 if (finite(q))
270 {
271
272 register REAL diff=p-q;
273
274 if (diff>0) //p>q
275 {
276 if (diff > LOGRANGE)
277 return p;
278 else
279 return p + logtable[(int)(diff*LOGACCURACY)];
280 }
281 else //p<=q
282 {
283 if (-diff > LOGRANGE)
284 return q;
285 else
286 return q + logtable[(int)(-diff*LOGACCURACY)];
287 }
288 }
289 CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
290 return NAN;
291 }
292 else
293 return q;
294 }
295
296 ///init log table of form log(1+exp(x))
297 static void init_log_table();
298
299 /// determine INT x for that log(1+exp(-x)) == 0
300 static INT determine_logrange();
301
302 /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
303 static INT determine_logaccuracy(INT range);
304 #else
305 /*
306 inline REAL logarithmic_sum(REAL p, REAL q)
307 {
308 double result=comp_logarithmic_sum(p,q);
309
310 printf("diff:%f <-> %f\n",p-q, result);
311 return result;
312 }*/
313
314 static inline REAL logarithmic_sum(REAL p, REAL q)
315 {
316 if (finite(p))
317 {
318 if (finite(q))
319 {
320
321 register REAL diff=p-q;
322 if (diff>0) //p>q
323 {
324 if (diff > LOGRANGE)
325 return p;
326 else
327 return p + log(1+exp(-diff));
328 }
329 else //p<=q
330 {
331 if (-diff > LOGRANGE)
332 return q;
333 else
334 return q + log(1+exp(diff));
335 }
336 }
337 return p;
338 }
339 else
340 return q;
341 }
342 #endif
343 #ifdef LOG_SUM_ARRAY
344 /** sum up a whole array of values in logspace.
345 * This function addresses the numeric instabiliy caused by simply summing up N elements by adding
346 * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
347 * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
348 */
349 static inline REAL logarithmic_sum_array(REAL *p, INT len)
350 {
351 if (len<=2)
352 {
353 if (len==2)
354 return logarithmic_sum(p[0],p[1]) ;
355 if (len==1)
356 return p[0];
357 return -INFTY ;
358 }
359 else
360 {
361 register REAL *pp=p ;
362 if (len%2==1) pp++ ;
363 for (register INT j=0; j < len>>1; j++)
364 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
365 }
366 return logarithmic_sum_array(p,len%2+len>>1) ;
367 }
368 #endif
369 //@}
370 public:
371 /**@name constants*/
372 //@{
373 /// infinity
374 static const REAL INFTY;
375
376 /// almost neg (log) infinity
377 static const REAL ALMOST_NEG_INFTY;
378
379 /// range for logtable: log(1+exp(x)) -LOGRANGE <= x <= 0
380 static INT LOGRANGE;
381
382 #ifdef USE_LOGCACHE
383 /// number of steps per integer
384 static INT LOGACCURACY;
385 //@}
386 protected:
387 ///table with log-values
388 static REAL* logtable;
389 #endif
390 static CHAR rand_state[256];
391 };
392
393
394 //implementations of template functions
395 template <class T>
396 void CMath::qsort(T* output, INT size)
397 {
398 if (size==2)
399 {
400 if (output[0] > output [1])
401 swap(output[0],output[1]);
402 }
403 else
404 {
405 REAL split=output[(size*rand())/(RAND_MAX+1)];
406 //REAL split=output[size/2];
407
408 INT left=0;
409 INT right=size-1;
410
411 while (left<=right)
412 {
413 while (output[left] < split)
414 left++;
415 while (output[right] > split)
416 right--;
417
418 if (left<=right)
419 {
420 swap(output[left],output[right]);
421 left++;
422 right--;
423 }
424 }
425
426 if (right+1> 1)
427 qsort(output,right+1);
428
429 if (size-left> 1)
430 qsort(&output[left],size-left);
431 }
432 }
433
434 template <class T1,class T2>
435 void CMath::qsort(T1* output, T2* index, INT size)
436 {
437 if (size==2)
438 {
439 if (output[0] > output [1]){
440 swap(output[0],output[1]);
441 swap(index[0],index[1]);
442 }
443
444 }
445 else
446 {
447 REAL split=output[(size*rand())/(RAND_MAX+1)];
448 //REAL split=output[size/2];
449
450 INT left=0;
451 INT right=size-1;
452
453 while (left<=right)
454 {
455 while (output[left] < split)
456 left++;
457 while (output[right] > split)
458 right--;
459
460 if (left<=right)
461 {
462 swap(output[left],output[right]);
463 swap(index[left],index[right]);
464 left++;
465 right--;
466 }
467 }
468
469 if (right+1> 1)
470 qsort(output,index,right+1);
471
472 if (size-left> 1)
473 qsort(&output[left],&index[left], size-left);
474 }
475 }
476
477 template <class T1,class T2>
478 void CMath::qsort_backward(T1* output, T2* index, INT size)
479 {
480 if (size==2)
481 {
482 if (output[0] < output [1]){
483 swap(output[0],output[1]);
484 swap(index[0],index[1]);
485 }
486
487 }
488 else
489 {
490 REAL split=output[(size*rand())/(RAND_MAX+1)];
491 //REAL split=output[size/2];
492
493 INT left=0;
494 INT right=size-1;
495
496 while (left<=right)
497 {
498 while (output[left] > split)
499 left++;
500 while (output[right] < split)
501 right--;
502
503 if (left<=right)
504 {
505 swap(output[left],output[right]);
506 swap(index[left],index[right]);
507 left++;
508 right--;
509 }
510 }
511
512 if (right+1> 1)
513 qsort(output,index,right+1);
514
515 if (size-left> 1)
516 qsort(&output[left],&index[left], size-left);
517 }
518 }
519
520 template <class T>
521 void CMath::nmin(REAL* output, T* index, INT size, INT n)
522 {
523 if (6*n*size<13*size*log((double)size))
524 for (INT i=0; i<n; i++)
525 min(&output[i], &index[i], size-i) ;
526 else
527 qsort(output, index, size) ;
528 }
529
530 template <class T>
531 void CMath::min(REAL* output, T* index, INT size)
532 {
533 if (size<=0)
534 return ;
535 REAL min_elem = output[0] ;
536 INT min_index = 0 ;
537 for (INT i=1; i<size; i++)
538 if (output[i]<min_elem)
539 {
540 min_index=i ;
541 min_elem=output[i] ;
542 }
543 swap(output[0], output[min_index]) ;
544 swap(index[0], index[min_index]) ;
545 }
546 #endif