1 // Math.cpp: implementation of the CMath class.
3 //////////////////////////////////////////////////////////////////////
6 #include "Mathmatics.h"
10 #include <sys/types.h>
18 //////////////////////////////////////////////////////////////////////
19 // Construction/Destruction
20 //////////////////////////////////////////////////////////////////////
23 //gene/math specific constants
25 #define MAX_LOG_TABLE_SIZE 10*1024*1024
26 #define LOG_TABLE_PRECISION 1e-6
28 #define MAX_LOG_TABLE_SIZE 123*1024*1024
29 #define LOG_TABLE_PRECISION 1e-15
32 INT
CMath::LOGACCURACY
= 0; // 100000 steps per integer
35 INT
CMath::LOGRANGE
= 0; // range for logtable: log(1+exp(x)) -25 <= x <= 0
38 const REAL
CMath::INFTY
= 1e11
; // infinity
40 const REAL
CMath::INFTY
= -log(0.0); // infinity
42 const REAL
CMath::ALMOST_NEG_INFTY
= -1000;
44 CHAR
CMath::rand_state
[256];
49 gettimeofday(&tv
, NULL
);
50 UINT seed
=(UINT
) (4223517*getpid()*tv
.tv_sec
*tv
.tv_usec
);
51 initstate(seed
, CMath::rand_state
, sizeof(CMath::rand_state
));
52 CIO::message(M_INFO
, "seeding random number generator with %u\n", seed
);
55 LOGRANGE
=CMath::determine_logrange();
56 LOGACCURACY
=CMath::determine_logaccuracy(LOGRANGE
);
57 CIO::message(M_INFO
, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE
,LOGACCURACY
,sizeof(REAL
),LOGRANGE
*LOGACCURACY
*sizeof(REAL
)/(1024.0*1024.0)) ;
59 CMath::logtable
=new REAL
[LOGRANGE
*LOGACCURACY
];
61 CIO::message(M_INFO
, "Done.\n") ;
64 while ((REAL
)log(1+((REAL
)exp(-REAL(i
)))))
66 CIO::message(M_INFO
, "determined range for x in log(1+exp(-x)) is:%d\n", i
);
79 INT
CMath::determine_logrange()
85 acc
=((REAL
)log(1+((REAL
)exp(-REAL(i
)))));
86 if (acc
<=(REAL
)LOG_TABLE_PRECISION
)
90 CIO::message(M_INFO
, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i
,acc
);
94 INT
CMath::determine_logaccuracy(INT range
)
96 range
=MAX_LOG_TABLE_SIZE
/range
/((int)sizeof(REAL
));
97 CIO::message(M_INFO
, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range
,1.0/(double) range
);
101 //init log table of form log(1+exp(x))
102 void CMath::init_log_table()
104 for (INT i
=0; i
< LOGACCURACY
*LOGRANGE
; i
++)
105 logtable
[i
]=log(1+exp(REAL(-i
)/REAL(LOGACCURACY
)));
109 void CMath::sort(INT
*a
, INT cols
, INT sort_col
)
112 if (a
[0]==-1) return ;
116 while ((a
[(i
+1)*cols
]!=-1) && (a
[(i
+1)*cols
+1]!=-1)) // to be sure
118 if (a
[i
*cols
+sort_col
]>a
[(i
+1)*cols
+sort_col
])
120 for (INT j
=0; j
<cols
; j
++)
121 CMath::swap(a
[i
*cols
+j
],a
[(i
+1)*cols
+j
]) ;
129 void CMath::sort(REAL
*a
, INT
* idx
, INT N
)
136 for (INT i
=0; i
<N
-1; i
++)
141 swap(idx
[i
],idx
[i
+1]) ;
151 //plot x- axis false positives (fp) 1-Specificity
152 //plot y- axis true positives (tp) Sensitivity
153 INT
CMath::calcroc(REAL
* fp
, REAL
* tp
, REAL
* output
, INT
* label
, INT
& size
, INT
& possize
, INT
& negsize
, REAL
& tresh
, FILE* rocfile
)
159 for (i
=0; i
<size
; i
++)
161 if (!(label
[i
]== -1 || label
[i
]==1))
165 //sort data such that -1 labels first +1 behind
168 while ((label
[left
] < 0) && (left
<right
))
170 while ((label
[right
] > 0) && (left
<right
)) //warning: label must be either -1 or +1
173 swap(output
[left
],output
[right
]);
174 swap(label
[left
], label
[right
]);
181 REAL
* posout
=&output
[left
];
183 // sort the pos and neg outputs separately
184 qsort(negout
, negsize
);
185 qsort(posout
, possize
);
187 // set minimum+maximum values for decision-treshhold
188 REAL minimum
=min(negout
[0], posout
[0]);
189 REAL maximum
=minimum
;
191 maximum
=max(maximum
,negout
[negsize
-1]);
193 maximum
=max(maximum
,posout
[possize
-1]);
195 REAL treshhold
=minimum
;
196 REAL old_treshhold
=treshhold
;
199 for (i
=0; i
<size
; i
++)
205 //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
206 //everything right of {pos,neg}idx is considered to beLONG to +1
214 while (iteration
< size
&& treshhold
<=maximum
)
216 old_treshhold
=treshhold
;
218 while (treshhold
==old_treshhold
&& treshhold
<=maximum
)
220 if (posidx
<possize
&& negidx
<negsize
)
222 if (posout
[posidx
]<negout
[negidx
])
224 if (posout
[posidx
]==treshhold
)
227 treshhold
=posout
[posidx
];
231 if (negout
[negidx
]==treshhold
)
234 treshhold
=negout
[negidx
];
239 if (posidx
>=possize
&& negidx
<negsize
-1)
242 treshhold
=negout
[negidx
];
244 else if (negidx
>=negsize
&& posidx
<possize
-1)
247 treshhold
=posout
[posidx
];
249 else if (negidx
<negsize
&& treshhold
!=negout
[negidx
])
250 treshhold
=negout
[negidx
];
251 else if (posidx
<possize
&& treshhold
!=posout
[posidx
])
252 treshhold
=posout
[posidx
];
255 treshhold
=2*(maximum
+100); // force termination
264 tp
[iteration
]=(possize
-posidx
)/(REAL (possize
));
265 fp
[iteration
]=(negsize
-negidx
)/(REAL (negsize
));
267 //choose poINT with minimal error
268 if (minerr
> negsize
*fp
[iteration
]/size
+(1-tp
[iteration
])*possize
/size
)
270 minerr
=negsize
*fp
[iteration
]/size
+(1-tp
[iteration
])*possize
/size
;
271 tresh
=(old_treshhold
+treshhold
)/2;
283 const CHAR id
[]="ROC";
284 fwrite(id
, sizeof(char), sizeof(id
), rocfile
);
285 fwrite(fp
, sizeof(REAL
), size
, rocfile
);
286 fwrite(tp
, sizeof(REAL
), size
, rocfile
);
292 UINT
CMath::crc32(BYTE
*data
, INT len
)
300 for (i
=0; i
<len
; i
++)
305 if ((octet
>> 7) ^ (result
>> 31))
307 result
= (result
<< 1) ^ 0x04c11db7;
311 result
= (result
<< 1);
320 double CMath::mutual_info(REAL
* p1
, REAL
* p2
, INT len
)
324 for (INT i
=0; i
<len
; i
++)
325 for (INT j
=0; j
<len
; j
++)
326 e
+=exp(p2
[j
*len
+i
])*(p2
[j
*len
+i
]-p1
[i
]-p1
[j
]);
331 double CMath::relative_entropy(REAL
* p
, REAL
* q
, INT len
)
335 for (INT i
=0; i
<len
; i
++)
336 e
+=exp(p
[i
])*(p
[i
]-q
[i
]);
341 double CMath::entropy(REAL
* p
, INT len
)
345 for (INT i
=0; i
<len
; i
++)