--- /dev/null
+SRCS= Mathmatics.cpp\
+ fill_matrix.cpp\
+ result_align.cpp\
+ penalty_info.cpp\
+ print_align.cpp\
+ io.cpp
+
+OBJS = $(SRCS:%.cpp=%.o)
+
+#CXXFLAGS=-O3 -fPIC
+#CXXFLAGS=-O3 -fPIC -pg -fprofile-arcs
+CXXFLAGS=-O3 -fPIC -ggdb
+
+IF=QPalmaDP
+
+all: $(OBJS)
+ swig -c++ -python ${IF}.i
+ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+ python -c "import ${IF}"
+
+clean:
+ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
+
--- /dev/null
+// Math.cpp: implementation of the CMath class.
+//
+//////////////////////////////////////////////////////////////////////
+
+
+#include "Mathmatics.h"
+#include "io.h"
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+#include <assert.h>
+
+//////////////////////////////////////////////////////////////////////
+// Construction/Destruction
+//////////////////////////////////////////////////////////////////////
+
+#ifdef USE_LOGCACHE
+//gene/math specific constants
+#ifdef USE_HMMDEBUG
+#define MAX_LOG_TABLE_SIZE 10*1024*1024
+#define LOG_TABLE_PRECISION 1e-6
+#else
+#define MAX_LOG_TABLE_SIZE 123*1024*1024
+#define LOG_TABLE_PRECISION 1e-15
+#endif
+
+INT CMath::LOGACCURACY = 0; // 100000 steps per integer
+#endif
+
+INT CMath::LOGRANGE = 0; // range for logtable: log(1+exp(x)) -25 <= x <= 0
+
+#ifdef USE_PATHDEBUG
+const REAL CMath::INFTY = 1e11; // infinity
+#else
+const REAL CMath::INFTY = -log(0.0); // infinity
+#endif
+const REAL CMath::ALMOST_NEG_INFTY = -1000;
+
+CHAR CMath::rand_state[256];
+
+CMath::CMath()
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ UINT seed=(UINT) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
+ initstate(seed, CMath::rand_state, sizeof(CMath::rand_state));
+ CIO::message(M_INFO, "seeding random number generator with %u\n", seed);
+
+#ifdef USE_LOGCACHE
+ LOGRANGE=CMath::determine_logrange();
+ LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
+ CIO::message(M_INFO, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE,LOGACCURACY,sizeof(REAL),LOGRANGE*LOGACCURACY*sizeof(REAL)/(1024.0*1024.0)) ;
+
+ CMath::logtable=new REAL[LOGRANGE*LOGACCURACY];
+ init_log_table();
+ CIO::message(M_INFO, "Done.\n") ;
+#else
+ INT i=0;
+ while ((REAL)log(1+((REAL)exp(-REAL(i)))))
+ i++;
+ CIO::message(M_INFO, "determined range for x in log(1+exp(-x)) is:%d\n", i);
+ LOGRANGE=i;
+#endif
+}
+
+CMath::~CMath()
+{
+#ifdef USE_LOGCACHE
+ delete[] logtable;
+#endif
+}
+
+#ifdef USE_LOGCACHE
+INT CMath::determine_logrange()
+{
+ INT i;
+ REAL acc=0;
+ for (i=0; i<50; i++)
+ {
+ acc=((REAL)log(1+((REAL)exp(-REAL(i)))));
+ if (acc<=(REAL)LOG_TABLE_PRECISION)
+ break;
+ }
+
+ CIO::message(M_INFO, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
+ return i;
+}
+
+INT CMath::determine_logaccuracy(INT range)
+{
+ range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(REAL));
+ CIO::message(M_INFO, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
+ return range;
+}
+
+//init log table of form log(1+exp(x))
+void CMath::init_log_table()
+{
+ for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
+ logtable[i]=log(1+exp(REAL(-i)/REAL(LOGACCURACY)));
+}
+#endif
+
+void CMath::sort(INT *a, INT cols, INT sort_col)
+{
+ INT changed=1;
+ if (a[0]==-1) return ;
+ while (changed)
+ {
+ changed=0; INT i=0 ;
+ while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
+ {
+ if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
+ {
+ for (INT j=0; j<cols; j++)
+ CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
+ changed=1 ;
+ } ;
+ i++ ;
+ } ;
+ } ;
+}
+
+void CMath::sort(REAL *a, INT* idx, INT N)
+{
+
+ INT changed=1;
+ while (changed)
+ {
+ changed=0;
+ for (INT i=0; i<N-1; i++)
+ {
+ if (a[i]>a[i+1])
+ {
+ swap(a[i],a[i+1]) ;
+ swap(idx[i],idx[i+1]) ;
+ changed=1 ;
+ } ;
+ } ;
+ } ;
+
+}
+
+
+
+//plot x- axis false positives (fp) 1-Specificity
+//plot y- axis true positives (tp) Sensitivity
+INT CMath::calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile)
+{
+ INT left=0;
+ INT right=size-1;
+ INT i;
+
+ for (i=0; i<size; i++)
+ {
+ if (!(label[i]== -1 || label[i]==1))
+ return -1;
+ }
+
+ //sort data such that -1 labels first +1 behind
+ while (left<right)
+ {
+ while ((label[left] < 0) && (left<right))
+ left++;
+ while ((label[right] > 0) && (left<right)) //warning: label must be either -1 or +1
+ right--;
+
+ swap(output[left],output[right]);
+ swap(label[left], label[right]);
+ }
+
+ // neg/pos sizes
+ negsize=left;
+ possize=size-left;
+ REAL* negout=output;
+ REAL* posout=&output[left];
+
+ // sort the pos and neg outputs separately
+ qsort(negout, negsize);
+ qsort(posout, possize);
+
+ // set minimum+maximum values for decision-treshhold
+ REAL minimum=min(negout[0], posout[0]);
+ REAL maximum=minimum;
+ if (negsize>0)
+ maximum=max(maximum,negout[negsize-1]);
+ if (possize>0)
+ maximum=max(maximum,posout[possize-1]);
+
+ REAL treshhold=minimum;
+ REAL old_treshhold=treshhold;
+
+ //clear array.
+ for (i=0; i<size; i++)
+ {
+ fp[i]=1.0;
+ tp[i]=1.0;
+ }
+
+ //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
+ //everything right of {pos,neg}idx is considered to beLONG to +1
+ INT posidx=0;
+ INT negidx=0;
+ INT iteration=1;
+ INT returnidx=-1;
+
+ REAL minerr=10;
+
+ while (iteration < size && treshhold<=maximum)
+ {
+ old_treshhold=treshhold;
+
+ while (treshhold==old_treshhold && treshhold<=maximum)
+ {
+ if (posidx<possize && negidx<negsize)
+ {
+ if (posout[posidx]<negout[negidx])
+ {
+ if (posout[posidx]==treshhold)
+ posidx++;
+ else
+ treshhold=posout[posidx];
+ }
+ else
+ {
+ if (negout[negidx]==treshhold)
+ negidx++;
+ else
+ treshhold=negout[negidx];
+ }
+ }
+ else
+ {
+ if (posidx>=possize && negidx<negsize-1)
+ {
+ negidx++;
+ treshhold=negout[negidx];
+ }
+ else if (negidx>=negsize && posidx<possize-1)
+ {
+ posidx++;
+ treshhold=posout[posidx];
+ }
+ else if (negidx<negsize && treshhold!=negout[negidx])
+ treshhold=negout[negidx];
+ else if (posidx<possize && treshhold!=posout[posidx])
+ treshhold=posout[posidx];
+ else
+ {
+ treshhold=2*(maximum+100); // force termination
+ posidx=possize;
+ negidx=negsize;
+ break;
+ }
+ }
+ }
+
+ //calc tp,fp
+ tp[iteration]=(possize-posidx)/(REAL (possize));
+ fp[iteration]=(negsize-negidx)/(REAL (negsize));
+
+ //choose poINT with minimal error
+ if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
+ {
+ minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
+ tresh=(old_treshhold+treshhold)/2;
+ returnidx=iteration;
+ }
+
+ iteration++;
+ }
+
+ // set new size
+ size=iteration;
+
+ if (rocfile)
+ {
+ const CHAR id[]="ROC";
+ fwrite(id, sizeof(char), sizeof(id), rocfile);
+ fwrite(fp, sizeof(REAL), size, rocfile);
+ fwrite(tp, sizeof(REAL), size, rocfile);
+ }
+
+ return returnidx;
+}
+
+UINT CMath::crc32(BYTE *data, INT len)
+{
+ UINT result;
+ INT i,j;
+ BYTE octet;
+
+ result = 0-1;
+
+ for (i=0; i<len; i++)
+ {
+ octet = *(data++);
+ for (j=0; j<8; j++)
+ {
+ if ((octet >> 7) ^ (result >> 31))
+ {
+ result = (result << 1) ^ 0x04c11db7;
+ }
+ else
+ {
+ result = (result << 1);
+ }
+ octet <<= 1;
+ }
+ }
+
+ return ~result;
+}
+
+double CMath::mutual_info(REAL* p1, REAL* p2, INT len)
+{
+ double e=0;
+
+ for (INT i=0; i<len; i++)
+ for (INT j=0; j<len; j++)
+ e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
+
+ return e;
+}
+
+double CMath::relative_entropy(REAL* p, REAL* q, INT len)
+{
+ double e=0;
+
+ for (INT i=0; i<len; i++)
+ e+=exp(p[i])*(p[i]-q[i]);
+
+ return e;
+}
+
+double CMath::entropy(REAL* p, INT len)
+{
+ double e=0;
+
+ for (INT i=0; i<len; i++)
+ e-=exp(p[i])*p[i];
+
+ return e;
+}
--- /dev/null
+#ifndef __MATHMATICS_H_
+#define __MATHMATICS_H_
+
+#include "common.h"
+#include "io.h"
+
+#include <math.h>
+#include <stdio.h>
+
+//define finite for win32
+#ifdef _WIN32
+#include <float.h>
+#define finite _finite
+#define isnan _isnan
+#endif
+
+#ifndef NAN
+#include <stdlib.h>
+#define NAN (strtod("NAN",NULL))
+#endif
+
+#ifdef SUNOS
+extern "C" int finite(double);
+#endif
+
+/** Mathematical Functions.
+ * Class which collects generic mathematical functions
+ */
+class CMath
+{
+public:
+ /**@name Constructor/Destructor.
+ */
+ //@{
+ ///Constructor - initializes log-table
+ CMath();
+
+ ///Destructor - frees logtable
+ virtual ~CMath();
+ //@}
+
+ /**@name min/max/abs functions.
+ */
+ //@{
+
+ ///return the minimum of two integers
+ //
+ template <class T>
+ static inline T min(T a, T b)
+ {
+ return ((a)<=(b))?(a):(b);
+ }
+
+ ///return the maximum of two integers
+ template <class T>
+ static inline T max(T a, T b)
+ {
+ return ((a)>=(b))?(a):(b);
+ }
+
+ ///return the maximum of two integers
+ template <class T>
+ static inline T abs(T a)
+ {
+ return ((a)>=(0))?(a):(-a);
+ }
+ //@}
+
+ /**@name misc functions */
+ //@{
+
+ /// crc32
+ static UINT crc32(BYTE *data, INT len);
+
+ static inline REAL round(REAL d)
+ {
+ return floor(d+0.5);
+ }
+
+ /// signum of type T variable a
+ template <class T>
+ static inline REAL sign(REAL a)
+ {
+ if (a==0)
+ return 0;
+ else return (a<0) ? (-1) : (+1);
+ }
+
+ /// swap floats a and b
+ template <class T>
+ static inline void swap(T & a,T &b)
+ {
+ T c=a ;
+ a=b; b=c ;
+ }
+
+ /// x^2
+ template <class T>
+ static inline T sq(T x)
+ {
+ return x*x;
+ }
+
+ static inline LONG factorial(INT n)
+ {
+ LONG res=1 ;
+ for (int i=2; i<=n; i++)
+ res*=i ;
+ return res ;
+ }
+
+ static inline LONG nchoosek(INT n, INT k)
+ {
+ long res=1 ;
+
+ for (INT i=n-k+1; i<=n; i++)
+ res*=i ;
+
+ return res/factorial(k) ;
+ }
+
+ /** performs a bubblesort on a given matrix a.
+ * it is sorted from in ascending order from top to bottom
+ * and left to right */
+ static void sort(INT *a, INT cols, INT sort_col=0) ;
+ static void sort(REAL *a, INT*idx, INT N) ;
+
+ /** performs a quicksort on an array output of length size
+ * it is sorted from in ascending (for type T) */
+ template <class T>
+ static void qsort(T* output, INT size) ;
+
+ /** performs a quicksort on an array output of length size
+ * it is sorted from in ascending order
+ * (for type T1) and returns the index (type T2)
+ * matlab alike [sorted,index]=sort(output)
+ */
+ template <class T1,class T2>
+ static void qsort(T1* output, T2* index, INT size);
+
+ /** performs a quicksort on an array output of length size
+ * it is sorted from in ascending order
+ * (for type T1) and returns the index (type T2)
+ * matlab alike [sorted,index]=sort(output)
+ */
+ template <class T1,class T2>
+ static void qsort_backward(T1* output, T2* index, INT size);
+
+ /* finds the smallest element in output and puts that element as the
+ first element */
+ template <class T>
+ static void min(REAL* output, T* index, INT size);
+
+ /* finds the n smallest elements in output and puts these elements as the
+ first n elements */
+ template <class T>
+ static void nmin(REAL* output, T* index, INT size, INT n);
+
+ /* performs a inplace unique of a vector of type T using quicksort
+ * returns the new number of elements */
+ template <class T>
+ static INT unique(T* output, INT size)
+ {
+ qsort(output, size) ;
+ INT i,j=0 ;
+ for (i=0; i<size; i++)
+ if (i==0 || output[i]!=output[i-1])
+ output[j++]=output[i] ;
+ return j ;
+ }
+
+ /* finds an element in a sorted array via binary search
+ * returns -1 if not found */
+ static inline INT fast_find(WORD* output, INT size, WORD elem)
+ {
+ INT start=0, end=size-1, middle=size/2 ;
+
+ if (output[start]>elem || output[end]<elem)
+ return -1 ;
+
+ while (1)
+ {
+ if (output[middle]>elem)
+ {
+ end = middle ;
+ middle=start+(end-start)/2 ;
+ } ;
+ if (output[middle]<elem)
+ {
+ start = middle ;
+ middle=start+(end-start)/2 ;
+ }
+ if (output[middle]==elem)
+ return middle ;
+ if (end-start<=1)
+ {
+ if (output[start]==elem)
+ return start ;
+ if (output[end]==elem)
+ return end ;
+ return -1 ;
+ }
+ }
+ }
+
+ /* finds an element in a sorted array via binary search
+ * that is smaller-equal elem und the next element is larger-equal */
+ static inline INT fast_find_range(REAL* output, INT size, REAL elem)
+ {
+ INT start=0, end=size-2, middle=(size-2)/2 ;
+
+ if (output[start]>elem)
+ return -1 ;
+ if (output[end]<=elem)
+ return size-1 ;
+
+ while (1)
+ {
+ if (output[middle]>elem)
+ {
+ end = middle ;
+ middle=start+(end-start)/2 ;
+ } ;
+ if (output[middle]<elem)
+ {
+ start = middle ;
+ middle=start+(end-start)/2 ;
+ }
+ if (output[middle]<=elem && output[middle+1]>=elem)
+ return middle ;
+ if (end-start<=1)
+ {
+ if (output[start]<=elem && output[start+1]>=elem)
+ return start ;
+ return end ;
+ }
+ }
+ }
+
+ /** calculates ROC into (fp,tp)
+ * from output and label of length size
+ * returns index with smallest error=fp+fn
+ */
+ static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
+ //@}
+
+ /// returns the mutual information of p which is given in logspace
+ /// where p,q are given in logspace
+ static double mutual_info(REAL* p1, REAL* p2, INT len);
+
+ /// returns the relative entropy H(P||Q),
+ /// where p,q are given in logspace
+ static double relative_entropy(REAL* p, REAL* q, INT len);
+
+ /// returns entropy of p which is given in logspace
+ static double entropy(REAL* p, INT len);
+
+ /**@name summing functions */
+ //@{
+ /** sum logarithmic probabilities.
+ * Probability measures are summed up but are now given in logspace where
+ * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
+ * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
+#ifdef USE_LOGCACHE
+ static inline REAL logarithmic_sum(REAL p, REAL q)
+ {
+ if (finite(p))
+ {
+ if (finite(q))
+ {
+
+ register REAL diff=p-q;
+
+ if (diff>0) //p>q
+ {
+ if (diff > LOGRANGE)
+ return p;
+ else
+ return p + logtable[(int)(diff*LOGACCURACY)];
+ }
+ else //p<=q
+ {
+ if (-diff > LOGRANGE)
+ return q;
+ else
+ return q + logtable[(int)(-diff*LOGACCURACY)];
+ }
+ }
+ CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
+ return NAN;
+ }
+ else
+ return q;
+ }
+
+ ///init log table of form log(1+exp(x))
+ static void init_log_table();
+
+ /// determine INT x for that log(1+exp(-x)) == 0
+ static INT determine_logrange();
+
+ /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
+ static INT determine_logaccuracy(INT range);
+#else
+ /*
+ inline REAL logarithmic_sum(REAL p, REAL q)
+ {
+ double result=comp_logarithmic_sum(p,q);
+
+ printf("diff:%f <-> %f\n",p-q, result);
+ return result;
+ }*/
+
+ static inline REAL logarithmic_sum(REAL p, REAL q)
+ {
+ if (finite(p))
+ {
+ if (finite(q))
+ {
+
+ register REAL diff=p-q;
+ if (diff>0) //p>q
+ {
+ if (diff > LOGRANGE)
+ return p;
+ else
+ return p + log(1+exp(-diff));
+ }
+ else //p<=q
+ {
+ if (-diff > LOGRANGE)
+ return q;
+ else
+ return q + log(1+exp(diff));
+ }
+ }
+ return p;
+ }
+ else
+ return q;
+ }
+#endif
+#ifdef LOG_SUM_ARRAY
+ /** sum up a whole array of values in logspace.
+ * This function addresses the numeric instabiliy caused by simply summing up N elements by adding
+ * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
+ * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
+ */
+ static inline REAL logarithmic_sum_array(REAL *p, INT len)
+ {
+ if (len<=2)
+ {
+ if (len==2)
+ return logarithmic_sum(p[0],p[1]) ;
+ if (len==1)
+ return p[0];
+ return -INFTY ;
+ }
+ else
+ {
+ register REAL *pp=p ;
+ if (len%2==1) pp++ ;
+ for (register INT j=0; j < len>>1; j++)
+ pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
+ }
+ return logarithmic_sum_array(p,len%2+len>>1) ;
+ }
+#endif
+ //@}
+public:
+ /**@name constants*/
+ //@{
+ /// infinity
+ static const REAL INFTY;
+
+ /// almost neg (log) infinity
+ static const REAL ALMOST_NEG_INFTY;
+
+ /// range for logtable: log(1+exp(x)) -LOGRANGE <= x <= 0
+ static INT LOGRANGE;
+
+#ifdef USE_LOGCACHE
+ /// number of steps per integer
+ static INT LOGACCURACY;
+ //@}
+protected:
+ ///table with log-values
+ static REAL* logtable;
+#endif
+ static CHAR rand_state[256];
+};
+
+
+//implementations of template functions
+template <class T>
+void CMath::qsort(T* output, INT size)
+{
+ if (size==2)
+ {
+ if (output[0] > output [1])
+ swap(output[0],output[1]);
+ }
+ else
+ {
+ REAL split=output[(size*rand())/(RAND_MAX+1)];
+ //REAL split=output[size/2];
+
+ INT left=0;
+ INT right=size-1;
+
+ while (left<=right)
+ {
+ while (output[left] < split)
+ left++;
+ while (output[right] > split)
+ right--;
+
+ if (left<=right)
+ {
+ swap(output[left],output[right]);
+ left++;
+ right--;
+ }
+ }
+
+ if (right+1> 1)
+ qsort(output,right+1);
+
+ if (size-left> 1)
+ qsort(&output[left],size-left);
+ }
+}
+
+template <class T1,class T2>
+void CMath::qsort(T1* output, T2* index, INT size)
+{
+ if (size==2)
+ {
+ if (output[0] > output [1]){
+ swap(output[0],output[1]);
+ swap(index[0],index[1]);
+ }
+
+ }
+ else
+ {
+ REAL split=output[(size*rand())/(RAND_MAX+1)];
+ //REAL split=output[size/2];
+
+ INT left=0;
+ INT right=size-1;
+
+ while (left<=right)
+ {
+ while (output[left] < split)
+ left++;
+ while (output[right] > split)
+ right--;
+
+ if (left<=right)
+ {
+ swap(output[left],output[right]);
+ swap(index[left],index[right]);
+ left++;
+ right--;
+ }
+ }
+
+ if (right+1> 1)
+ qsort(output,index,right+1);
+
+ if (size-left> 1)
+ qsort(&output[left],&index[left], size-left);
+ }
+}
+
+template <class T1,class T2>
+void CMath::qsort_backward(T1* output, T2* index, INT size)
+{
+ if (size==2)
+ {
+ if (output[0] < output [1]){
+ swap(output[0],output[1]);
+ swap(index[0],index[1]);
+ }
+
+ }
+ else
+ {
+ REAL split=output[(size*rand())/(RAND_MAX+1)];
+ //REAL split=output[size/2];
+
+ INT left=0;
+ INT right=size-1;
+
+ while (left<=right)
+ {
+ while (output[left] > split)
+ left++;
+ while (output[right] < split)
+ right--;
+
+ if (left<=right)
+ {
+ swap(output[left],output[right]);
+ swap(index[left],index[right]);
+ left++;
+ right--;
+ }
+ }
+
+ if (right+1> 1)
+ qsort(output,index,right+1);
+
+ if (size-left> 1)
+ qsort(&output[left],&index[left], size-left);
+ }
+}
+
+template <class T>
+void CMath::nmin(REAL* output, T* index, INT size, INT n)
+{
+ if (6*n*size<13*size*log((double)size))
+ for (INT i=0; i<n; i++)
+ min(&output[i], &index[i], size-i) ;
+ else
+ qsort(output, index, size) ;
+}
+
+template <class T>
+void CMath::min(REAL* output, T* index, INT size)
+{
+ if (size<=0)
+ return ;
+ REAL min_elem = output[0] ;
+ INT min_index = 0 ;
+ for (INT i=1; i<size; i++)
+ if (output[i]<min_elem)
+ {
+ min_index=i ;
+ min_elem=output[i] ;
+ }
+ swap(output[0], output[min_index]) ;
+ swap(index[0], index[min_index]) ;
+}
+#endif
--- /dev/null
+%module QPalmaDP
+%{
+#include "qpalma_dp.h"
+%}
+
+%include "std_vector.i"
+%include "std_string.i"
+
+%include "carrays.i"
+
+%include "qpalma_dp.h"
+
+
+%array_functions(int, intArray) ;
+%array_functions(double, doubleArray) ;
+%array_class(Pre_score, Pre_scoreArray) ;
+
+%pythoncode
+%{
+
+def createDoubleArrayFromList(list):
+ array = new_doubleArray(len(list))
+ for i in range(len(list)):
+ doubleArray_setitem(array, i, list[i])
+ return array
+
+def createIntArrayFromList(list):
+ array = new_intArray(len(list))
+ for i in range(len(list)):
+ intArray_setitem(array, i, list[i])
+ return array
+
+def createListFromIntArray(array, array_len):
+ list = [0]*array_len
+ for i in range(array_len):
+ list[i] = intArray_getitem(array,i)
+ return list
+
+def createListFromDoubleArray(array, array_len):
+ list = [0]*array_len
+ for i in range(array_len):
+ list[i] = doubleArray_getitem(array,i)
+ return list
+
+%}
+
--- /dev/null
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "config.h"
+
+#ifdef SUNOS
+#define bool int
+#define false 0
+#define true 1
+#endif
+
+#ifndef LINUX
+#define RANDOM_MAX 2147483647
+#else
+#define RANDOM_MAX RAND_MAX
+#endif
+
+/**@name Standard Types
+ * Definition of Platform independent Types
+*/
+//@{
+
+/// Type CHAR
+typedef char CHAR;
+typedef CHAR* P_CHAR;
+
+/// Type BYTE
+typedef unsigned char BYTE;
+typedef BYTE* P_BYTE;
+
+/// Type SHORT is 2 bytes in size
+typedef short int SHORT;
+typedef SHORT* P_SHORT;
+
+/// Type WORD is 2 bytes in size
+typedef unsigned short int WORD;
+typedef WORD* P_WORD;
+
+/// Type INT is 4 bytes in size
+typedef int INT;
+typedef INT* P_INT;
+
+/// Type INT is 4 bytes in size
+typedef unsigned int UINT;
+typedef UINT* P_UINT;
+
+/// Type LONG is 8 bytes in size
+typedef long LONG;
+typedef LONG* P_LONG;
+
+/// Type SHORTREAL is 4 bytes in size
+typedef float SHORTREAL;
+typedef SHORTREAL* P_SHORTREAL;
+
+/// Type REAL is 8 bytes in size
+typedef double REAL;
+typedef REAL* P_REAL;
+
+/// Type LONGREAL is 16 bytes in size
+//typedef long double LONGREAL;
+//typedef LONGREAL* P_LONGREAL;
+
+#ifdef USE_SHORTREAL_KERNELCACHE
+ typedef SHORTREAL KERNELCACHE_ELEM;
+#else
+ typedef REAL KERNELCACHE_ELEM;
+#endif
+
+typedef KERNELCACHE_ELEM P_KERNELCACHE_ELEM;
+
+typedef LONG KERNELCACHE_IDX;
+
+/// The io libs output [DEBUG] etc in front of every CIO::message
+/// 'higher' messages filter output depending on the loglevel, i.e. CRITICAL messages
+/// will print all M_CRITICAL TO M_EMERGENCY messages to
+enum EMessageType
+{
+ M_DEBUG,
+ M_INFO,
+ M_NOTICE,
+ M_WARN,
+ M_ERROR,
+ M_CRITICAL,
+ M_ALERT,
+ M_EMERGENCY,
+ M_MESSAGEONLY,
+ M_PROGRESS
+};
+
+enum EKernelType
+{
+ K_UNKNOWN = 0,
+ K_OPTIMIZABLE = 4096,
+ K_LINEAR = 10 | K_OPTIMIZABLE,
+ K_POLY = 20,
+ K_GAUSSIAN = 30,
+ K_HISTOGRAM = 40,
+ K_SALZBERG = 41,
+ K_LOCALITYIMPROVED = 50,
+ K_SIMPLELOCALITYIMPROVED = 60,
+ K_FIXEDDEGREE = 70,
+ K_WEIGHTEDDEGREE = 80 | K_OPTIMIZABLE,
+ K_WEIGHTEDDEGREEPOS = 81 | K_OPTIMIZABLE,
+ K_WEIGHTEDDEGREEPOLYA = 82,
+ K_WD = 83,
+ K_COMMWORD = 90 | K_OPTIMIZABLE ,
+ K_POLYMATCH = 100,
+ K_ALIGNMENT = 110,
+ K_COMMWORDSTRING = 120 | K_OPTIMIZABLE,
+ K_SPARSENORMSQUARED = 130,
+ K_COMBINED = 140 | K_OPTIMIZABLE
+};
+
+enum EFeatureType
+{
+ F_UNKNOWN = 0,
+ F_REAL = 10,
+ F_SHORT = 20,
+ F_CHAR = 30,
+ F_INT = 40,
+ F_BYTE = 50,
+ F_WORD = 60
+};
+
+enum EFeatureClass
+{
+ C_UNKNOWN = 0,
+ C_SIMPLE = 10,
+ C_SPARSE = 20,
+ C_STRING = 30,
+ C_COMBINED = 40
+};
+
+/// Alphabet of charfeatures/observations
+enum E_ALPHABET
+{
+ /// DNA - letters A,C,G,T,*,N,n
+ DNA=0,
+
+ /// PROTEIN - letters a-z
+ PROTEIN=1,
+
+ /// ALPHANUM - [0-9a-z]
+ ALPHANUM=2,
+
+ /// CUBE - [1-6]
+ CUBE=3,
+
+ /// NONE - type has no alphabet
+ NONE=4
+};
+
+//@}
+
+#define TMP_DIR "/tmp/"
+//#define TMP_DIR "/short/x46/tmp/"
+
+#endif
--- /dev/null
+
+#define HAVE_MATLAB 1
+
+#define HAVE_LARGEFILE 1
+
+
+
+
+
+
+#define USE_BIGSTATES 1
+
+
+#define USE_HMMCACHE 1
+
+
+
+
+
--- /dev/null
+/*********************************************************************************************/
+// Version of fill_matrix with fills several alignment matrices (best, second_best ...)
+// nr_paths det. number of these matrices
+//
+// est_len: Number of columns in alignment matrix: length(EST) +1 (for the gap) (variable: i)
+// dna_len: Number of lines in alignment matrix: length(DNA) +1 (for the gap) (variable: j)
+//
+// !!! IMPORTANT !!! The pictures and equations in the paper/poster etc. describe the algorithm
+// for an alignment matrix where DNA and EST changed places !!! IMPORTANT !!!
+/*********************************************************************************************/
+
+/*
+ matchmatrix: columnwise
+ - A C G T N (dna)
+ - x x x x x x
+ A x x x x x x
+ C x x x x x x
+ G x x x x x x
+ T x x x x x x
+ N x x x x x x
+(est)
+*/
+
+/*
+ alignment matrix: columnwise
+ |-EST . . .
+ -+------------
+ -|00 ...
+ D|0.
+ N|. .
+ A|. .
+ .|.
+ .|
+ .|
+*/
+
+
+/* TODO
+ - max_len = 10000 for elegans!
+ - do not use double
+
+*/
+
+
+#include "fill_matrix.h"
+
+using namespace std;
+
+int check_char(char base)
+{
+ switch(base)
+ {
+ case 'A':
+ case 'a': return 1;
+ case 'C':
+ case 'c': return 2;
+ case 'G':
+ case 'g': return 3;
+ case 'T':
+ case 't': return 4;
+ case 'N':
+ case 'n': return 5;
+ default : return 0;
+ }
+
+}
+
+
+
+void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, penalty_struct* functions , double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
+{
+
+ /*********************************************************************************************/
+ // variables
+ /*********************************************************************************************/
+ const int mlen = 6; // length of matchmatrix
+ int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
+ double prevValue ;
+ double tempValue;
+ int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
+ //printf("max_len: %d\n", max_len) ;
+
+ int num_elem ; // counter variable
+
+ const int max_num_elem = (4+nr_paths+max_len) * nr_paths ; // upper bound on num_elem
+ double *scores=new double[max_num_elem] ; //are sorted later
+ struct ArrayElem *array=new ArrayElem[max_num_elem] ;
+ //printf("%d\n", max_num_elem*sizeof(ArrayElem)) ;
+
+ int max_ss_element[nr_paths*nr_paths]; //position on dna with max ss score for introns of length greater than max_len
+ int splice_pos ; //splice position get from donor_sites
+ int start_splice_pos ; //where to start looking in donor_sites
+
+ double max_scores[nr_paths] ;
+
+ /*********************************************************************************************/
+ // Initialisation of all alignment matrices, columnwise
+ /*********************************************************************************************/
+ for (int z=0; z<nr_paths; z++) /* matrice z */
+ {
+ max_scores[z] = log(0.0) ;
+ max_score_positions[2*z] = 0; // pos i
+ max_score_positions[2*z +1] = 0; // pos j
+
+ for (int i=0; i<est_len; i++) /* i.column */
+ {
+ for (int j=0; j<dna_len; j++) /* j.line */
+ {
+ ((Pre_score*)matrices[z] +(i*dna_len) +j)->value = log(0.0); // -inf
+ ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_i = 0;
+ ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_j = 0;
+ ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_matrix_no = 0;
+ ((Pre_score*)matrices[z] +(i*dna_len) +j)->issplice = 0;
+ }
+ }
+ }
+
+
+ /*********************************************************************************************/
+ // Element (0,0) for first/best matrice (z=0)
+ /*********************************************************************************************/
+ ((Pre_score*)matrices[0])->value = 0;
+ ((Pre_score*)matrices[0])->prev_i = 0;
+ ((Pre_score*)matrices[0])->prev_j = 0;
+ ((Pre_score*)matrices[0])->prev_matrix_no = 0;
+ ((Pre_score*)matrices[0])->issplice = 0;
+
+
+ /*********************************************************************************************/
+ // Line Zero for first/best matrice (z=0)
+ /*********************************************************************************************/
+ for (int i=1; i<est_len; i++) /* 0.line, all columns (DNA='-') */
+ {
+ ((Pre_score*)matrices[0] +(i*dna_len))->value = 0 ;
+ ((Pre_score*)matrices[0] +(i*dna_len))->prev_i = i-1;
+ ((Pre_score*)matrices[0] +(i*dna_len))->prev_j = 0;
+ ((Pre_score*)matrices[0] +(i*dna_len))->prev_matrix_no = 0;
+ ((Pre_score*)matrices[0] +(i*dna_len))->issplice = 0;
+ }
+
+
+ /*********************************************************************************************/
+ // Column Zero for first/best matrice (z=0)
+ /*********************************************************************************************/
+ for (int j=1; j<dna_len; j++) // 0.column, all lines
+ {
+ ((Pre_score*)matrices[0] +j)->value = 0 ;
+ ((Pre_score*)matrices[0] +j)->prev_i = 0 ;
+ ((Pre_score*)matrices[0] +j)->prev_j = j-1 ;
+ ((Pre_score*)matrices[0] +j)->prev_matrix_no = 0 ;
+ ((Pre_score*)matrices[0] +j)->issplice = 0;
+ }
+
+
+ /*********************************************************************************************/
+ // !!!! Column Zero and line Zero for all other matrices except the best one (z>0): -INF !!!!
+ /*********************************************************************************************/
+
+
+
+ /*********************************************************************************************/
+ // all other entries for all matrices, last column and row treated seperately
+ /*********************************************************************************************/
+
+ /* columnwise, 0.column and 0.line already filled */
+ for (int i=1; i<est_len; i++) /* i.column */
+ {
+
+ start_splice_pos = 0 ; //where to start looking in donor_sites
+ for (int zz=0; zz<nr_paths*nr_paths; zz++) {max_ss_element[zz]=0 ;} // initializing
+
+ for (int j=1; j<dna_len; j++) //j.line
+ {
+ dnaChar = check_char(dna[j-1]) ; // -1 because of of the gaps in the 0.line/column of the al-matrix
+ estChar = check_char(est[i-1]) ;
+
+
+ //if introns of length greater than max_len possible
+ if (j > max_len)
+ {
+ //storing highest ss_score (the position) and NOT -INF
+ int new_ss_pos = j-max_len ;
+ double donor_score = donor[new_ss_pos-1] ;
+ if (isfinite(donor_score)) //splice site!
+ {
+ if (max_ss_element[nr_paths-1] != 0)
+ {
+ for (int zz=0; zz<nr_paths; zz++) //for each alignment matrix
+ {
+ int pos_with_minscore = nr_paths ; //assuming, the new score is the worst score
+ double new_intron_score = donor_score+((Pre_score*)matrices[zz]+(i*dna_len)+new_ss_pos-1)->value ; //could be -INF
+ if (!(isfinite(new_intron_score))) {printf("new_intron_score is -INF\n") ;}
+ for (int pos=0; pos<nr_paths; pos++) //find worst score of all nr_paths+1 scores
+ {
+ int act_splice_position = max_ss_element[zz*nr_paths+pos] ;
+ assert(isfinite(donor[act_splice_position-1])) ;
+ double act_intron_score = donor[act_splice_position-1]+((Pre_score*)matrices[zz]+(i*dna_len)+act_splice_position-1)->value ;
+ if (new_intron_score >= act_intron_score)
+ {
+ new_intron_score = act_intron_score ;
+ pos_with_minscore = pos ;
+ } //else nothing happens, new intron score still the worst one
+ }
+ if (pos_with_minscore != nr_paths)
+ {
+ max_ss_element[zz*nr_paths+pos_with_minscore] = new_ss_pos ; //replacing
+ }
+ }
+ }
+ else
+ {
+ for (int pos=0; pos<nr_paths; pos++)
+ {
+ if (max_ss_element[pos]==0) //no previous splice site at this pos
+ {
+ for (int zz=0; zz<nr_paths; zz++) {max_ss_element[zz*nr_paths+pos]=j-max_len ;}
+ break ;
+ }
+ }
+ }
+ }
+ //start_pos in donor_sites so that only introns shorter then max_length are seen
+ if ((start_splice_pos < nr_donor_sites) && donor_sites[start_splice_pos] == j-max_len)
+ {
+ start_splice_pos++ ;
+ }
+ }
+
+
+ /* Computing all possible scores */
+ num_elem = 0 ; /* number of different scores/possibilities so far */
+ for (int z=0; z<nr_paths; z++) /* matrice z */
+ {
+ Pre_score* actMatrix = (Pre_score*)matrices[z]; /* hm, why? */
+
+ /***************************************/
+ // 0. Zero (local alignment version)
+ /***************************************/
+ assert(num_elem<max_num_elem) ;
+ array[num_elem].prev_i = 0;
+ array[num_elem].prev_j = 0;
+ array[num_elem].prev_matrix_no = z;
+ array[num_elem].isSplice = false; /* no splice site */
+ scores[num_elem] = 0 ;
+ num_elem++ ;
+
+
+ /***************************************/
+ // 1. gap on EST (j-1,i) -> (j,i)
+ /***************************************/
+ prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
+ if (isfinite(prevValue))
+ {
+ tempValue = prevValue +(matchmatrix[mlen* dnaChar]); /* score(gap,DNA) */
+ if (isfinite(tempValue))
+ {
+ assert(num_elem<max_num_elem) ;
+ array[num_elem].prev_i = i;
+ array[num_elem].prev_j = j-1; /* predecessor */
+ array[num_elem].prev_matrix_no = z;
+ array[num_elem].isSplice = false; /* no splice site */
+ scores[num_elem] = -tempValue ;
+ num_elem++ ;
+ }
+ }
+
+
+ /***************************************/
+ // 2. match/mismatch (j-1,i-1) -> (j,i)
+ /***************************************/
+ prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
+ if (isfinite(prevValue))
+ {
+ tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);//DNA mit EST
+ if (isfinite(tempValue))
+ {
+ assert(num_elem<max_num_elem) ;
+ array[num_elem].prev_i = i-1; /* predecessor */
+ array[num_elem].prev_j = j-1; /* predecessor */
+ array[num_elem].prev_matrix_no = z;
+ array[num_elem].isSplice = false; /* no splice site */
+ scores[num_elem] = -tempValue ;
+ num_elem++ ;
+ }
+ }
+
+
+ /***************************************/
+ // 3. gap on DNA (j,i-1) -> (j,i)
+ /***************************************/
+ prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
+ if (isfinite(prevValue))
+ {
+ tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
+ if (isfinite(tempValue))
+ {
+ assert(num_elem<max_num_elem) ;
+ array[num_elem].prev_i = i-1; /* predecessor */
+ array[num_elem].prev_j = j;
+ array[num_elem].prev_matrix_no = z;
+ array[num_elem].isSplice = false; /* no splice site */
+ scores[num_elem]=-tempValue ;
+ num_elem++ ;
+ }
+
+ }
+
+
+ /***************************************/
+ // 4. all possible splice sites/introns
+ /***************************************/
+ // acceptor[j-1]: g of ag (-1 because cpp starts counting at 0)
+ // donor[splice_pos-1]: g of gt/gc (-1 because cpp starts counting at 0)
+ // donor_sites: the g's of the gt/gc
+
+ if (isfinite(acceptor[j-1]))
+ {
+
+ //the nr_paths many introns of length greater than max_len
+ for (int pos=0; pos<nr_paths; pos++)
+ {
+ if (max_ss_element[z*nr_paths+pos]>0)
+ {
+ assert((j-max_ss_element[z*nr_paths+pos]+1) > max_len) ;
+ splice_pos = max_ss_element[z*nr_paths+pos] ; //the g of the gt
+ prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+ if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+ {
+ int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+ assert(dist>max_len) ;
+ tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+ if (isfinite(tempValue))
+ {
+ assert(num_elem<max_num_elem) ;
+ array[num_elem].prev_i = i;
+ array[num_elem].prev_j = splice_pos-1; // predecessor
+ array[num_elem].prev_matrix_no = z;
+ array[num_elem].isSplice = true; // splice site
+ scores[num_elem]=-tempValue ;
+ num_elem++ ;
+ }
+ else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+ {
+ printf("Something wrong with donor_sites(1000er) (intron_length: %d)\n", dist) ;
+ sleep(5000) ;
+ }
+ }
+ }
+ }
+
+
+
+ //all introns of length <= max_len
+ for (int k=start_splice_pos; k < nr_donor_sites; k++)
+ //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
+ {
+ splice_pos = donor_sites[k] ;
+ if (splice_pos > j-4) { break ;}
+ prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+
+ if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+ {
+ int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+ assert(dist<=max_len) ;
+ tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+ if (isfinite(tempValue))
+ {
+ assert(num_elem<max_num_elem) ;
+ array[num_elem].prev_i = i;
+ array[num_elem].prev_j = splice_pos-1; /* predecessor */
+ array[num_elem].prev_matrix_no = z;
+ array[num_elem].isSplice = true; /* splice site */
+ scores[num_elem]=-tempValue ;
+ num_elem++ ;
+ }
+ else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+ {
+ printf("Something wrong with donor_sites (intron_length: %d)\n", dist) ;
+ sleep(5000) ;
+ }
+ }
+ }
+ }
+
+ } //end of z
+
+ CMath::nmin<struct ArrayElem>(scores, array, num_elem, nr_paths);
+
+ /* the nr_paths many highest scores are written in the nr_paths many matrices*/
+ for (int z=0; z<nr_paths; z++) {
+ if (z>=num_elem) {
+ //nothing happens, entry in matrices[z] at pos. (i,j) still -inf
+ }
+ else {
+ ((Pre_score*)matrices[z] + (i*dna_len) +j)->value = -scores[z] ;
+ ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_i = array[z].prev_i ;
+ ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_j = array[z].prev_j ;
+ ((Pre_score*)matrices[z] + (i*dna_len) +j)->issplice = array[z].isSplice ;
+ ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_matrix_no = array[z].prev_matrix_no;
+
+ if (-scores[z]>=max_scores[z]) { //storing the postion of the highest alignment score yet
+ max_scores[z] = -scores[z] ;
+ max_score_positions[2*z] = i; // pos i
+ max_score_positions[2*z +1] = j; // pos j
+ }
+ }
+ } //end of z
+
+ } //end of j
+ } // end of i
+
+
+
+ //free memory
+ delete[] array;
+ delete[] scores;
+
+ return;
+}
+
+
--- /dev/null
+#ifndef _FILLMATRIX_H__
+#define _FILLMATRIX_H__
+
+#include "Mathmatics.h"
+#include "penalty_info.h"
+#include <algorithm> //Eigentlich in Header
+#include <assert.h>
+#include <ctype.h>
+#include <iostream>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+struct ArrayElem { //24B
+ double score;
+ int prev_j;
+ int prev_i;
+ int prev_matrix_no;
+ bool isSplice;
+} ;
+
+struct pre_score { //24B
+ double value; //8
+ int prev_i; //4
+ int prev_j; //4
+ int prev_matrix_no;//4
+ bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+ double value ;
+ bool issplice;
+ int previ,prevj ;
+ char est_char;
+ char dna_char;
+};
+
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+// funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i, int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+
+extern void print_align(Pre_score* matrix, int length_est, int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+#endif // _FILLMATRIX_H__
--- /dev/null
+#include "io.h"
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+FILE* CIO::target=stdout;
+
+CIO::CIO()
+{
+}
+
+void CIO::message(EMessageType prio, const CHAR *fmt, ... )
+{
+ check_target();
+ print_message_prio(prio, target);
+ va_list list;
+ va_start(list,fmt);
+ vfprintf(target,fmt,list);
+ va_end(list);
+ fflush(target);
+}
+
+void CIO::buffered_message(EMessageType prio, const CHAR *fmt, ... )
+{
+ check_target();
+ print_message_prio(prio, target);
+ va_list list;
+ va_start(list,fmt);
+ vfprintf(target,fmt,list);
+ va_end(list);
+}
+
+CHAR* CIO::skip_spaces(CHAR* str)
+{
+ INT i=0;
+
+ if (str)
+ {
+ for (i=0; isspace(str[i]); i++);
+
+ return &str[i];
+ }
+ else
+ return str;
+}
+
+void CIO::set_target(FILE* t)
+{
+ target=t;
+}
+
+void CIO::check_target()
+{
+ if (!target)
+ target=stdout;
+}
+
+void CIO::print_message_prio(EMessageType prio, FILE* target)
+{
+ switch (prio)
+ {
+ case M_DEBUG:
+ fprintf(target, "[DEBUG] ");
+ break;
+ case M_PROGRESS:
+ fprintf(target, "[PROGRESS] ");
+ break;
+ case M_INFO:
+ //fprintf(target, "[INFO] ");
+ break;
+ case M_NOTICE:
+ fprintf(target, "[NOTICE] ");
+ break;
+ case M_WARN:
+ fprintf(target, "[WARN] ");
+ break;
+ case M_ERROR:
+ fprintf(target, "[ERROR] ");
+ break;
+ case M_CRITICAL:
+ fprintf(target, "[CRITICAL] ");
+ break;
+ case M_ALERT:
+ fprintf(target, "[ALERT] ");
+ break;
+ case M_EMERGENCY:
+ fprintf(target, "[EMERGENCY] ");
+ break;
+ case M_MESSAGEONLY:
+ break;
+ default:
+ break;
+ }
+}
--- /dev/null
+#ifndef __CIO_H__
+#define __CIO_H__
+
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+
+class CIO
+{
+public:
+ CIO();
+
+ static void set_target(FILE* target);
+ static void message(EMessageType prio, const CHAR *fmt, ... );
+
+ inline static void not_implemented()
+ {
+ message(M_ERROR, "Sorry, not yet implemented\n");
+ }
+
+ static void buffered_message(EMessageType prio, const CHAR *fmt, ... );
+
+ static CHAR* skip_spaces(CHAR* str);
+
+protected:
+ static void check_target();
+ static void print_message_prio(EMessageType prio, FILE* target);
+
+protected:
+ static FILE* target;
+};
+#endif
--- /dev/null
+#include <assert.h>
+#include "config.h"
+//#include "features/CharFeatures.h"
+//#include "features/StringFeatures.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "io.h"
+
+
+#include "fill_matrix.h"
+#include "penalty_info.h"
+
+void init_penalty_struct(struct penalty_struct &PEN)
+{
+ PEN.limits=NULL ;
+ PEN.penalties=NULL ;
+ PEN.id=-1 ;
+ PEN.next_pen=NULL ;
+ PEN.transform = T_LINEAR ;
+ PEN.name = NULL ;
+ PEN.max_len=0 ;
+ PEN.min_len=0 ;
+ PEN.cache=NULL ;
+ PEN.use_svm=0 ;
+}
+
+void init_penalty_struct_cache(struct penalty_struct &PEN)
+{
+ if (PEN.cache || PEN.use_svm)
+ return ;
+
+ REAL* cache=new REAL[PEN.max_len+1] ;
+ if (cache)
+ {
+ for (INT i=0; i<=PEN.max_len; i++)
+ cache[i] = lookup_penalty(&PEN, i, 0, false) ;
+ PEN.cache = cache ;
+ }
+}
+
+void delete_penalty_struct(struct penalty_struct &PEN)
+{
+ if (PEN.id!=-1)
+ {
+ delete[] PEN.limits ;
+ delete[] PEN.penalties ;
+ delete[] PEN.name ;
+ delete[] PEN.cache ;
+ }
+}
+
+void delete_penalty_struct_array(struct penalty_struct *PEN, INT len)
+{
+ for (int i=0; i<len; i++)
+ delete_penalty_struct(PEN[i]) ;
+ delete[] PEN ;
+}
+
+
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, int &P)
+//{
+// P = mxGetN(mx_penalty_info) ;
+//
+// struct penalty_struct * PEN = new struct penalty_struct[P] ;
+// for (INT i=0; i<P; i++)
+// init_penalty_struct(PEN[i]) ;
+//
+// for (INT i=0; i<P; i++)
+// {
+// const mxArray* mx_elem = mxGetCell(mx_penalty_info, i) ;
+// if (mx_elem==NULL || !mxIsStruct(mx_elem))
+// {
+// CIO::message(M_ERROR, "empty cell element\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// } ;
+// const mxArray* mx_id_field = mxGetField(mx_elem, 0, "id") ;
+// if (mx_id_field==NULL || !mxIsNumeric(mx_id_field) ||
+// mxGetN(mx_id_field)!=1 || mxGetM(mx_id_field)!=1)
+// {
+// CIO::message(M_ERROR, "missing id field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// const mxArray* mx_limits_field = mxGetField(mx_elem, 0, "limits") ;
+// if (mx_limits_field==NULL || !mxIsNumeric(mx_limits_field) ||
+// mxGetM(mx_limits_field)!=1)
+// {
+// CIO::message(M_ERROR, "missing limits field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// INT len = mxGetN(mx_limits_field) ;
+//
+// const mxArray* mx_penalties_field = mxGetField(mx_elem, 0, "penalties") ;
+// if (mx_penalties_field==NULL || !mxIsNumeric(mx_penalties_field) ||
+// mxGetM(mx_penalties_field)!=1 || mxGetN(mx_penalties_field)!=len)
+// {
+// CIO::message(M_ERROR, "missing penalties field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// const mxArray* mx_transform_field = mxGetField(mx_elem, 0, "transform") ;
+// if (mx_transform_field==NULL || !mxIsChar(mx_transform_field))
+// {
+// CIO::message(M_ERROR, "missing transform field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// const mxArray* mx_name_field = mxGetField(mx_elem, 0, "name") ;
+// if (mx_name_field==NULL || !mxIsChar(mx_name_field))
+// {
+// CIO::message(M_ERROR, "missing name field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// const mxArray* mx_max_len_field = mxGetField(mx_elem, 0, "max_len") ;
+// if (mx_max_len_field==NULL || !mxIsNumeric(mx_max_len_field) ||
+// mxGetM(mx_max_len_field)!=1 || mxGetN(mx_max_len_field)!=1)
+// {
+// CIO::message(M_ERROR, "missing max_len field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// const mxArray* mx_min_len_field = mxGetField(mx_elem, 0, "min_len") ;
+// if (mx_min_len_field==NULL || !mxIsNumeric(mx_min_len_field) ||
+// mxGetM(mx_min_len_field)!=1 || mxGetN(mx_min_len_field)!=1)
+// {
+// CIO::message(M_ERROR, "missing min_len field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// const mxArray* mx_use_svm_field = mxGetField(mx_elem, 0, "use_svm") ;
+// if (mx_use_svm_field==NULL || !mxIsNumeric(mx_use_svm_field) ||
+// mxGetM(mx_use_svm_field)!=1 || mxGetN(mx_use_svm_field)!=1)
+// {
+// CIO::message(M_ERROR, "missing use_svm field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// INT use_svm = (INT) mxGetScalar(mx_use_svm_field) ;
+// //fprintf(stderr, "use_svm_field=%i\n", use_svm) ;
+//
+// const mxArray* mx_next_id_field = mxGetField(mx_elem, 0, "next_id") ;
+// if (mx_next_id_field==NULL || !mxIsNumeric(mx_next_id_field) ||
+// mxGetM(mx_next_id_field)!=1 || mxGetN(mx_next_id_field)!=1)
+// {
+// CIO::message(M_ERROR, "missing next_id field\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// INT next_id = (INT) mxGetScalar(mx_next_id_field)-1 ;
+//
+// INT id = (INT) mxGetScalar(mx_id_field)-1 ;
+// if (i<0 || i>P-1)
+// {
+// CIO::message(M_ERROR, "id out of range\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// INT max_len = (INT) mxGetScalar(mx_max_len_field) ;
+// if (max_len<0 || max_len>1024*1024*100)
+// {
+// CIO::message(M_ERROR, "max_len out of range\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// PEN[id].max_len = max_len ;
+//
+// INT min_len = (INT) mxGetScalar(mx_min_len_field) ;
+// if (min_len<0 || min_len>1024*1024*100)
+// {
+// CIO::message(M_ERROR, "min_len out of range\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// PEN[id].min_len = min_len ;
+//
+// if (PEN[id].id!=-1)
+// {
+// CIO::message(M_ERROR, "penalty id already used\n") ;
+// delete_penalty_struct_array(PEN,P) ;
+// return NULL ;
+// }
+// PEN[id].id=id ;
+// if (next_id>=0)
+// PEN[id].next_pen=&PEN[next_id] ;
+// //fprintf(stderr,"id=%i, next_id=%i\n", id, next_id) ;
+//
+// assert(next_id!=id) ;
+// PEN[id].use_svm=use_svm ;
+// PEN[id].limits = new REAL[len] ;
+// PEN[id].penalties = new REAL[len] ;
+// double * limits = mxGetPr(mx_limits_field) ;
+// double * penalties = mxGetPr(mx_penalties_field) ;
+//
+// for (INT i=0; i<len; i++)
+// {
+// PEN[id].limits[i]=limits[i] ;
+// PEN[id].penalties[i]=penalties[i] ;
+// }
+// PEN[id].len = len ;
+//
+// char *transform_str = mxArrayToString(mx_transform_field) ;
+// char *name_str = mxArrayToString(mx_name_field) ;
+//
+// if (strcmp(transform_str, "log")==0)
+// PEN[id].transform = T_LOG ;
+// else if (strcmp(transform_str, "log(+1)")==0)
+// PEN[id].transform = T_LOG_PLUS1 ;
+// else if (strcmp(transform_str, "log(+3)")==0)
+// PEN[id].transform = T_LOG_PLUS3 ;
+// else if (strcmp(transform_str, "(+3)")==0)
+// PEN[id].transform = T_LINEAR_PLUS3 ;
+// else if (strcmp(transform_str, "")==0)
+// PEN[id].transform = T_LINEAR ;
+// else
+// {
+// delete_penalty_struct_array(PEN,P) ;
+// mxFree(transform_str) ;
+// return NULL ;
+// }
+// PEN[id].name = new char[strlen(name_str)+1] ;
+// strcpy(PEN[id].name, name_str) ;
+//
+// init_penalty_struct_cache(PEN[id]) ;
+//
+// mxFree(transform_str) ;
+// mxFree(name_str) ;
+// }
+// return PEN ;
+//}
+
+REAL lookup_penalty_svm(const struct penalty_struct *PEN, INT p_value, REAL *d_values)
+{
+ if (PEN==NULL)
+ return 0 ;
+ assert(PEN->use_svm>0) ;
+ REAL d_value=d_values[PEN->use_svm-1] ;
+ //fprintf(stderr,"transform=%i, d_value=%1.2f\n", (INT)PEN->transform, d_value) ;
+
+ switch (PEN->transform)
+ {
+ case T_LINEAR:
+ break ;
+ case T_LOG:
+ d_value = log(d_value) ;
+ break ;
+ case T_LOG_PLUS1:
+ d_value = log(d_value+1) ;
+ break ;
+ case T_LOG_PLUS3:
+ d_value = log(d_value+3) ;
+ break ;
+ case T_LINEAR_PLUS3:
+ d_value = d_value+3 ;
+ break ;
+ default:
+ CIO::message(M_ERROR, "unknown transform\n") ;
+ break ;
+ }
+
+ INT idx = 0 ;
+ REAL ret ;
+ for (INT i=0; i<PEN->len; i++)
+ if (PEN->limits[i]<=d_value)
+ idx++ ;
+
+ if (idx==0)
+ ret=PEN->penalties[0] ;
+ else if (idx==PEN->len)
+ ret=PEN->penalties[PEN->len-1] ;
+ else
+ {
+ ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+ (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;
+ }
+
+ //fprintf(stderr,"ret=%1.2f\n", ret) ;
+
+ if (PEN->next_pen)
+ ret+=lookup_penalty(PEN->next_pen, p_value, d_values);
+
+ //fprintf(stderr,"ret=%1.2f\n", ret) ;
+
+ return ret ;
+}
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value,
+ REAL* svm_values, bool follow_next)
+{
+ if (PEN==NULL)
+ return 0 ;
+ if (PEN->use_svm)
+ return lookup_penalty_svm(PEN, p_value, svm_values) ;
+
+ if ((p_value<PEN->min_len) || (p_value>PEN->max_len))
+ return -CMath::INFTY ;
+
+ if (PEN->cache!=NULL && (p_value>=0) && (p_value<=PEN->max_len))
+ {
+ REAL ret=PEN->cache[p_value] ;
+ if (PEN->next_pen && follow_next)
+ ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+ return ret ;
+ }
+
+ REAL d_value = (REAL) p_value ;
+ switch (PEN->transform)
+ {
+ case T_LINEAR:
+ break ;
+ case T_LOG:
+ d_value = log(d_value) ;
+ break ;
+ case T_LOG_PLUS1:
+ d_value = log(d_value+1) ;
+ break ;
+ case T_LOG_PLUS3:
+ d_value = log(d_value+3) ;
+ break ;
+ case T_LINEAR_PLUS3:
+ d_value = d_value+3 ;
+ break ;
+ default:
+ CIO::message(M_ERROR, "unknown transform\n") ;
+ break ;
+ }
+
+ INT idx = 0 ;
+ REAL ret ;
+ for (INT i=0; i<PEN->len; i++)
+ if (PEN->limits[i]<=d_value)
+ idx++ ;
+
+ if (idx==0)
+ ret=PEN->penalties[0] ;
+ else if (idx==PEN->len)
+ ret=PEN->penalties[PEN->len-1] ;
+ else
+ {
+ ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+ (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;
+ }
+ //if (p_value>=30 && p_value<150)
+ //fprintf(stderr, "%s %i(%i) -> %1.2f\n", PEN->name, p_value, idx, ret) ;
+
+ if (PEN->next_pen && follow_next)
+ ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+
+ return ret ;
+}
--- /dev/null
+#ifndef __PENALTY_INFO_H__
+#define __PENALTY_INFO_H__
+
+#include "common.h"
+#include "Mathmatics.h"
+
+enum ETransformType
+{
+ T_LINEAR,
+ T_LOG,
+ T_LOG_PLUS1,
+ T_LOG_PLUS3,
+ T_LINEAR_PLUS3
+} ;
+
+struct penalty_struct
+{
+ INT len ;
+ REAL *limits ;
+ REAL *penalties ;
+ INT max_len ;
+ INT min_len ;
+ REAL *cache ;
+ enum ETransformType transform ;
+ INT id ;
+ struct penalty_struct *next_pen ;
+ char * name ;
+ INT use_svm ;
+} ;
+
+void init_penalty_struct(struct penalty_struct &PEN) ;
+void delete_penalty_struct(struct penalty_struct &PEN) ;
+void delete_penalty_struct_array(struct penalty_struct *PEN, INT len) ;
+
+//#ifdef HAVE_MATLAB
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, mwSize &P) ;
+//#endif
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value,
+ REAL* svm_values, bool follow_next=true) ;
+
+#endif
--- /dev/null
+#include "fill_matrix.h"
+using namespace std;
+
+void print_align(Pre_score* matrix, int length_est, int length_dna,Align_pair* vektor, int result_length, int print_matrix)
+
+{
+ //Druck der matrix
+ switch(print_matrix)
+ {
+ case 1:
+ {
+ cout <<"Matrix: \n";
+
+ for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+ {
+ cout << "Zeile " << k << ":\t";
+ for (int l= 0; l<length_dna; l++)
+ {
+ cout << ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";
+ }
+ cout <<"\n";
+ }
+ break;
+ }
+
+ case 2:
+ {
+ cout <<"Matrix: \n";
+
+ for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+ {
+ cout << "Zeile " << k << ":\t";
+ for (int l= 0; l<length_dna; l++)
+ {
+ if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k-1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+ cout << "\\" <<((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";
+ else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+ cout << "<"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";
+ else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k -1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l))
+ cout << "|"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";
+ else { cout << ((Pre_score*)matrix + (k*length_dna) + l)->value << "(";
+ cout << ((Pre_score*)matrix + (k*length_dna) + l)->prev_i << "," << ((Pre_score*)matrix + (k*length_dna) + l)->prev_j << ")\t";
+ }
+
+ }
+ cout <<"\n";
+ }
+
+ cout << "length_est" << length_est << "\n";
+ cout << "length_dna" << length_dna << "\n";
+ cout << "result_length: " << result_length << "\n";
+
+
+ break;
+ }
+
+ case 3:
+ {
+ //Druck des Alignment
+
+ // cout << "Alignment\n" ;
+ int j = 0;
+ // cout << "resultlength: " << result_length <<"\n";
+ while (j<result_length) {
+ int pacman = j;
+ cout << "D:(" <<j<<")\t";
+ for (int i=0; ((j<result_length) && (i<50)); i++)
+ {
+ cout << vektor[j].dna_char << " ";
+ j++;
+ }
+
+ cout << "\n";
+
+ cout << "E:(" <<pacman<<")\t";
+ for (int i=0;((pacman<result_length) && (i<50)); i++)
+ {
+ cout << vektor[pacman].est_char << " ";
+ pacman++;
+ }
+ cout << "\n\n";
+ }
+
+ //Ende Druck Alignment
+ break ;
+ }
+
+ default:
+ {
+ cout <<"";
+ }
+ }
+
+
+
+}
+
--- /dev/null
+#include "qpalma_dp.h"
+
+/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
+ myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
+ print_matrix) */
+
+/* nlhs - Number of expected mxArrays (Left Hand Side)
+ plhs - Array of pointers to expected outputs
+ nrhs - Number of inputs (Right Hand Side)
+ prhs - Array of pointers to input data. */
+
+ myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
+ print_matrix) */
+
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+
+
+
+ /***************************************************************************/
+ // right number of arguments?
+ /***************************************************************************/
+ if(nrhs != 9)
+ {
+ mexPrintf("10 input arguments please: \n [splice_align, est_align, weightMatch, alignmentscores, dnaest] = myalign(nr_paths, dna, est, cellarray, matchmatrix, donor, acceptor, remove_duplicate_scores, print_matrix)\n");
+ return ;
+ }
+
+ if(nlhs != 5)
+ {
+ mexPrintf("5 output arguments please\n [splice_align, est_align, weightMatch, alignmentscores, dnaest] = myalign(nr_paths, dna, est, cellarray, matchmatrix, donor, acceptor, remove_duplicate_scores, print_matrix)\n");
+ return ;
+ }
+
+
+ /***************************************************************************/
+ // variables
+ /***************************************************************************/
+ const int mlen = 6; // score matrix: length of 6 for "- A C G T N"
+ int arg = 0 ; // counter variable
+ int failure ; //
+
+
+ /***************************************************************************/
+ // 0. nr_paths
+ /***************************************************************************/
+ int nr_paths = (int)*mxGetPr(prhs[arg]); // 0. input (double)
+ arg++; // arg -> 1
+
+
+ /***************************************************************************/
+ // 1. dna
+ /***************************************************************************/
+ int dna_len = mxGetN(prhs[arg]) + 1; // mxGetN: number of columns (int)
+ char* dna = (char*)mxCalloc(dna_len, sizeof(char));
+
+ failure = mxGetString(prhs[arg], dna, dna_len); // NULL at end of string
+ if (failure) { // mxGetString returns 0 if success, 1 on failure
+ mexPrintf("couldn't open dna");
+ return;
+ }
+ arg++; // arg -> 2
+
+
+ /***************************************************************************/
+ // 2. est
+ /***************************************************************************/
+ int est_len = mxGetN(prhs[arg]) + 1;
+ char* est = (char*)mxCalloc(est_len, sizeof(char));
+
+ failure = mxGetString(prhs[arg], est, est_len);
+ if (failure) {
+ mexPrintf("couldn't open est");
+ return;
+ }
+ arg++; // arg -> 3
+
+
+ /***************************************************************************/
+ // 3. h (cell array)
+ /***************************************************************************/
+ penalty_struct* functions;
+ mwSize anz_func = 1;
+
+ functions = read_penalty_struct_from_cell(prhs[arg], anz_func);
+ //std::cout << " bla " << functions->max_len << " \n";
+ //std::cout << "lookup_penalty(5) = " << lookup_penalty(functions, 5, NULL, 0)<<std::endl ;
+ arg++; // arg -> 4
+
+
+ /***************************************************************************/
+ // 4. match matrix (columnwise)
+ /***************************************************************************/
+ int nr_entries = mxGetN(prhs[arg]) * mxGetM(prhs[arg]);
+ //mexPrintf("\nNumber of entries in matchmatrix: %d\n", nr_entries) ;
+ double *matchmatrix;
+ matchmatrix = mxGetPr(prhs[arg]);
+ //mexPrintf("matchmatrix\n") ;
+ //for (int i=0; i< nr_entries; i++) {
+ // mexPrintf("entry %d: <%f>\n", i, matchmatrix[i]);
+ //}
+ arg++; // arg -> 5
+
+
+ /***************************************************************************/
+ // 5. donor
+ /***************************************************************************/
+ int donor_length = mxGetN(prhs[arg]);
+ double *donor;
+ donor = mxGetPr(prhs[arg]);
+ //mexPrintf("Donor eingelesen: <%f>\n", donor[0]);
+ arg++; // arg -> 6
+
+
+ /***************************************************************************/
+ // 6. acceptor
+ /***************************************************************************/
+ int acceptor_length = mxGetN(prhs[arg]);
+ double *acceptor;
+ acceptor = mxGetPr(prhs[arg]);
+ //mexPrintf("Acceptor eingelesen: <%f>\n", acceptor[0]);
+ arg++; // arg -> 7
+
+
+ /***************************************************************************/
+ // 7.remove_duplicate_scores
+ /***************************************************************************/
+ bool remove_duplicate_scores = (bool)*mxGetPr(prhs[arg]);
+ arg++; // arg -> 8
+
+
+ /***************************************************************************/
+ // 8. print_matrix
+ /***************************************************************************/
+ int print_matrix = (int)*mxGetPr(prhs[arg]);
+ arg++; // arg -> 9
+
+
+ /***************************************************************************/
+ // initialize alignment matrices and call fill_matrix()
+ /***************************************************************************/
+
+ //possible donor positions
+ int nr_donor_sites = 0 ;
+ for (int ii=0; ii<donor_length; ii++) {
+ if(isfinite(donor[ii])) {
+ nr_donor_sites++ ;
+ }
+ }
+ //mwSize donor_sites[nr_donor_sites] ;
+ mwSize* donor_sites = (mwSize*)mxCalloc(nr_donor_sites,sizeof(mwSize));
+ int donor_idx = 0 ;
+ for (int ii=0; ii<donor_length; ii++) {
+ if(isfinite(donor[ii])) {
+ donor_sites[donor_idx] = ii+1 ;
+ donor_idx++ ;
+ }
+ }
+
+
+ //int max_score_positions[nr_paths*2]; //local alignment: where is the highest alignment score [i1,j1,i2,j2...]
+ mwSize* max_score_positions = (mwSize*)mxCalloc(nr_paths*2,sizeof(mwSize));
+
+ //Pre_score* matrices[nr_paths];
+ Pre_score** matrices = (Pre_score**)mxCalloc(nr_paths,sizeof(Pre_score*));
+ for (int z=0; z<nr_paths; z++) {
+ //matrices[z] = new Pre_score[dna_len * est_len];
+ //mexPrintf("size of one matrice: %d\n", est_len*dna_len*sizeof(Pre_score)) ;
+ matrices[z] = (Pre_score*)mxCalloc(dna_len*est_len,sizeof(Pre_score));
+ }
+
+ fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, functions, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+
+ //just some info
+ //for (int z=0; z<nr_paths; z++) {
+ // mexPrintf("(%d, %d)\n", max_score_positions[2*z], max_score_positions[2*z +1]) ;
+ //}
+
+ /***************************************************************************/
+ // return arguments etc.
+ /***************************************************************************/
+ int result_length; //Eine Variable fuer alle Matrizen
+
+ //int splice_align[(dna_len-1)*nr_paths]; //SpliceAlign for DNA - 1 line per matrix
+ //int est_align[(est_len-1)*nr_paths]; //SpliceAlign for EST - 1 line per matrix
+ //int mmatrix_param[(mlen*mlen)*nr_paths];//Matrix fuer Scorematrixparameter - 1 Zeile pro Matrix
+ int* splice_align = (int*)mxCalloc((dna_len-1)*nr_paths,sizeof(int));
+ int* est_align = (int*)mxCalloc((est_len-1)*nr_paths,sizeof(int));
+ int* mmatrix_param = (int*)mxCalloc((mlen*mlen)*nr_paths,sizeof(int));
+ double alignmentscores[nr_paths]; //alignment score for each path/matrix
+
+
+ memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
+ memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
+ memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
+ memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
+
+ // dnaest
+ plhs[nlhs-1] = mxCreateCellMatrix(2, nr_paths);
+
+
+
+ //mexPrintf("DEBUG: decoding %d paths\n",nr_paths);
+ for (int z=0; z<nr_paths; z++) {
+ result_length = 0 ;
+
+ int* s_align = splice_align + (dna_len-1)*z; //pointer
+ int* e_align = est_align + (est_len-1)*z ; //pointer
+ int* mparam = mmatrix_param + (mlen*mlen)*z; //pointer
+
+ bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, s_align, e_align, mparam, alignmentscores, max_score_positions);
+
+ // dnaest
+ mxArray* mxdna = mxCreateDoubleMatrix(result_length, 1, mxREAL);
+ mxArray* mxest = mxCreateDoubleMatrix(result_length, 1, mxREAL);
+ double *DNA = mxGetPr(mxdna) ;
+ double *EST = mxGetPr(mxest) ;
+ mxSetCell(plhs[nlhs-1], z*2, mxdna) ;
+ mxSetCell(plhs[nlhs-1], z*2+1, mxest) ;
+
+ //backtracking
+ mwSize i = max_score_positions[2*z] ; //i (est)
+ mwSize j = max_score_positions[2*z +1] ; //j (dna)
+ mwSize prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i;
+ mwSize prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+ mwSize prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+
+ for (int ii=result_length; ii>0; ii--) {
+ if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+ DNA[ii-1] = check_char(dna[j-1]) ;
+ EST[ii-1] = check_char(est[i-1]) ;
+ }
+ else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
+ DNA[ii-1] = check_char(dna[j-1]) ;
+ EST[ii-1] = 0 ; //gap
+ }
+ else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
+ DNA[ii-1] = 0 ; //gap
+ EST[ii-1] = check_char(est[i-1]) ;
+ }
+ else {// splice site
+ for (j; j > prev_j; j--) {
+ DNA[ii-1] = check_char(dna[j-1]) ;
+ EST[ii-1] = 6 ; //intron
+ ii-- ;
+ }
+ ii++ ; // last ii-- too much (because done in for-loop)
+ }
+
+ i = prev_i;
+ j = prev_j;
+ prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i;
+ prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j;
+ prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
+ }
+
+ } //end of z
+
+
+
+ //mexPrintf("DEBUG:Returning arguments\n");
+ int index = 0 ;
+
+ //splice_align (mxArray):
+ const mwSize dims0[] = {(dna_len-1), nr_paths};
+ //const int dims0[] = {(dna_len-1), nr_paths};
+ plhs[index] = mxCreateNumericArray(2, dims0, mxINT32_CLASS, mxREAL);
+ mwSize* start0 = (mwSize*)(mxGetData(plhs[index]));
+ memcpy(start0, splice_align, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+ index++ ; // ->1
+
+ //est_align (mxArray):
+ const mwSize dims1[] = {(est_len-1), nr_paths};
+ //const int dims1[] = {(est_len-1), nr_paths};
+ plhs[index] = mxCreateNumericArray(2, dims1, mxINT32_CLASS, mxREAL);
+ mwSize* start1 = (mwSize*)(mxGetData(plhs[index]));
+ memcpy(start1, est_align, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+ index++ ; // ->2
+
+ //weightmatch (mxArray)
+ const mwSize dims2[] = {mlen*mlen, nr_paths};
+ //const int dims2[] = {mlen*mlen, nr_paths};
+ plhs[index] = mxCreateNumericArray(2, dims2, mxINT32_CLASS, mxREAL);
+ mwSize* start2= (mwSize*)(mxGetData(plhs[index]));
+ memcpy(start2, mmatrix_param, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+ index++ ; // ->3
+
+ //alignmentscore
+ plhs[index] = mxCreateDoubleMatrix(1,nr_paths,mxREAL);
+ if (plhs[index] == NULL)
+ {
+ printf("%s : Out of memory on line %d\n", __FILE__, __LINE__);
+ printf("Unable to create mxArray.\n");
+ exit(1);
+ }
+ double* start3 = (double*)(mxGetData(plhs[index]));
+ memcpy(start3, alignmentscores, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+ index++ ; // ->4
+
+ assert(nlhs==index+1) ;
+
+ //Free Memory
+ //mexPrintf("DEBUG:Free Memory\n");
+ mxFree(donor_sites);
+ mxFree(max_score_positions);
+ for (int i=nr_paths-1;i>=0; i--)
+ {
+ //delete[] matrices[i];
+ mxFree(matrices[i]);
+ }
+ mxFree(matrices);
+ mxFree(splice_align);
+ mxFree(est_align);
+ mxFree(mmatrix_param);
+ mxFree(dna);
+ mxFree(est);
+
+ return;
+
+}
+
--- /dev/null
+#ifndef _QPALMA_DP_H_
+#define _QPALMA_DP_H_
+
+#include "penalty_info.h"
+
+struct ArrayElem { //24B
+ double score;
+ int prev_j;
+ int prev_i;
+ int prev_matrix_no;
+ bool isSplice;
+} ;
+
+struct pre_score { //24B
+ double value; //8
+ int prev_i; //4
+ int prev_j; //4
+ int prev_matrix_no;//4
+ bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+ double value ;
+ bool issplice;
+ int previ,prevj ;
+ char est_char;
+ char dna_char;
+};
+
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i, int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+
+extern void print_align(Pre_score* matrix, int length_est, int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+
+/*
+ * The function myalign calculates a new alignment given the scoring scheme
+ * Its input arguments are:
+ *
+ * num_path(id) -> an integer specifying which alignment has to be done
+ * dna ->
+ * est ->
+ * h ->
+ * mmatrix ->
+ * donor ->
+ * acceptor ->
+ * remove_duplicate_scores -> a boolean
+ * print_matrix -> a boolean
+ *
+ * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
+ */
+
+class Alignment {
+
+ private:
+ SpliceAlign
+ EstAlign;
+ WeightMatch;
+ TotalScores;
+
+ public:
+ Alignment();
+ ~Alignment();
+
+ void myalign(int nr_paths, dna, est, h, matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, bool print_matrix);
+ getSpliceAlign()
+ getEstAlign();
+ getWeightMatch();
+ getTotalScores();
+ getDNAEST();
+
+
+};
+
+
+#endif // _QPALMA_DP_H_
+
--- /dev/null
+#include "assert.h"
+#include "fill_matrix.h"
+
+using namespace std;
+
+bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions)
+//Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
+
+{
+ const int mlen=6; // length of matchmatrix
+ int startend[4] ; // [i_start, j_start, i_end, j_end] ;
+ startend[3] = max_score_positions[2*z] ; //i (est)
+ startend[4] = max_score_positions[2*z +1] ; //j (dna)
+ //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
+ int dnanum ; //using check_char: letter -> number
+ int estnum ; //using check_char: letter -> number
+
+ int dna_pos ;
+ int est_pos ;
+ int splice_state ;
+ int est_state ;
+ for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
+ s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+ }
+ for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
+ e_align[est_pos-1] = 4 ;
+ }
+ //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
+
+
+ /* starting point for backtracking */
+ int i = startend[3] ;
+ int j = startend[4] ;
+ int act_m_no = z ;
+ alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
+ //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
+
+ int prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i;
+ int prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
+ int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+
+ if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
+ {
+ cout << "Kein weiterer Pfad vorhanden\n";
+ return 1;
+ }
+
+ if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
+ cout << "Kein weiterer Pfad vorhanden\n";
+ return 1;
+ }
+
+ assert(dna_pos == j) ;
+ assert(est_pos == i) ;
+ splice_state = 0 ; //exon
+ est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
+
+ //compute length of alignment (between max(m,n) and m+n)
+ //local_alignment: backtracking to first Zero: look at actual value
+ while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
+
+ //just some info
+ //if ((z>0) && (!(z==prev_m_no))) {
+ // printf("(%d,%d): next step in next matrix\n", i,j) ;
+ //}
+
+ if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+ (*result_length_ptr)= (*result_length_ptr) + 1;
+
+ s_align[dna_pos-1] = splice_state;
+ dna_pos-- ;
+ e_align[est_pos-1] = est_state ; //1 or 2, depended
+ est_pos-- ;
+
+ dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+ estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+ mparam[mlen*dnanum +estnum] ++ ;
+ }
+ else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
+ (*result_length_ptr)= (*result_length_ptr) + 1;
+
+ s_align[dna_pos-1] = splice_state;
+ dna_pos-- ;
+
+ dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+ estnum = 0 ; //gap
+ mparam[mlen*dnanum +estnum] ++ ;
+ }
+ else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
+ (*result_length_ptr)= (*result_length_ptr) + 1;
+
+ e_align[est_pos-1] = est_state ; //1 or 2, depended
+ est_pos-- ;
+
+ dnanum = 0 ; //gap
+ estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+ mparam[mlen*dnanum +estnum] ++ ;
+ }
+ else {// splice site
+ (*result_length_ptr) = (*result_length_ptr) + (j - prev_j);
+ if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
+ printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
+ printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
+ printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
+ printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
+ printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
+ printf("j: %d -> prev_j:%d\n", j, prev_j) ;
+ printf("i: %d -> prev_i:%d\n", i, prev_i) ;
+ }
+ if (est_state==1) { //was exon labeled "1"
+ est_state = 2 ; //new exon is labeled "2"
+ }
+ else {
+ est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
+ }
+ for (j; j > prev_j; j--) {
+ if (splice_state == 0) { //coming from exon
+ splice_state = 2 ; //first intron_state: acceptor
+ } else {
+ splice_state = 3 ; //intron
+ }
+ if (j-1 == prev_j) { //last intron_state: donor
+ splice_state = 1 ;//donor
+ }
+ s_align[dna_pos-1] = splice_state;
+ dna_pos-- ;
+ }
+ splice_state = 0 ; //exon again
+
+ }
+
+ startend[1] = i;
+ startend[2] = j;
+ i = prev_i;
+ j = prev_j;
+ act_m_no = prev_m_no ;
+
+ prev_i = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i;
+ prev_j = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j;
+ prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+ }
+ for (dna_pos; dna_pos>0; dna_pos--) {
+ s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+ }
+ for (est_pos; est_pos>0; est_pos--) {
+ e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+ }
+
+ //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
+ //printf("result_length: %d\n", *result_length_ptr) ;
+
+
+ return 0;
+}
+
+
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+from numpy import inf
from numpy.matlib import zeros
def calculateWeights(plf, scores):
y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
"""
- currentWeight = zeros((1,30))
+
+ currentWeight = zeros((30,1))
for k in range(len(scores)):
value = scores[k]
- Lower = sum(d.limits <= value)
- Upper = Lower +1 ; # x-werte bleiben fest
+
+ Lower = 0
+ Lower = len([elem for elem in plf.limits if elem <= value])
+ Upper = Lower+1 ; # x-werte bleiben fest
if Lower == 0:
- currentWeight[1] = currentWeight[1] + 1;
- elif:
- Lower == length[plf.limits]
- currentWeight[30] = currentWeight[30] + 1;
+ currentWeight[0] = currentWeight[0] + 1;
+ elif Lower == len(plf.limits):
+ currentWeight[-1] = currentWeight[-1] + 1;
else:
- weightup = (value - plf.limits(Lower)) / (plf.limits(Upper) - plf.limits(Lower)) ;
- weightlow = (plf.limits(Upper) - value) / (plf.limits(Upper) - plf.limits(Lower)) ;
- currentWeight(Upper) = currentWeight(Upper) + weightup;
- currentWeight(Lower) = currentWeight(Lower) + weightlow;
+ weightup = (value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower]) ;
+ weightlow = (plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower]) ;
+ currentWeight[Upper] = currentWeight[Upper] + weightup;
+ currentWeight[Lower] = currentWeight[Lower] + weightlow;
return currentWeight
def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
- assert(isempty(d.transform))
- assert(isempty(a.transform))
- assert(isempty(h.transform))
+ #assert(isempty(d.transform))
+ #assert(isempty(a.transform))
+ #assert(isempty(h.transform))
####################################################################################
# 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
####################################################################################
- # Lege Gewichtsvektor fuer 30 supporting points an:
- weightDon = zeros((1,30))
-
# Picke die Positionen raus, an denen eine Donorstelle ist
- DonorScores = don_supp(find(SpliceAlign==1)) ;
- assert(all(DonorScores~=-Inf)) ;
+ #DonorScores = don_supp(find(SpliceAlign==1)) ;
+ #assert(all(DonorScores~=-Inf)) ;
+
+ DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
+ assert not ( -inf in DonorScores )
weightDon = calculateWeights(d,DonorScores)
# 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
####################################################################################
- AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ;
- assert(all(AcceptorScores~=-Inf)) ;
+ #AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ;
+ #assert(all(AcceptorScores~=-Inf)) ;
#Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
- weightAcc = calculateWeights(a,AcceptorScores)
+ AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if SpliceAlign[pos] == 2]
+ #assert not ( -inf in AcceptorScores )
+ weightAcc = calculateWeights(a,AcceptorScores)
####################################################################################
# 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
####################################################################################
- intron_starts = find(SpliceAlign==1) ;
- intron_ends = find(SpliceAlign==2) ;
+ #intron_starts = find(SpliceAlign==1) ;
+ #intron_ends = find(SpliceAlign==2) ;
- %assert: introns do not overlap
- assert(length(intron_starts) == length(intron_ends)) ;
- assert(all(intron_starts < intron_ends)) ;
- assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ;
+ #%assert: introns do not overlap
+ #assert(length(intron_starts) == length(intron_ends)) ;
+ #assert(all(intron_starts < intron_ends)) ;
+ #assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ;
- weightIntron = calculateWeights(h,AcceptorScores)
+ #weightIntron = calculateWeights(h,AcceptorScores)
+ weightIntron = [1,2,3]
return weightDon, weightAcc, weightIntron
from set_param_palma import *
from computeSpliceAlign import *
+from computeSpliceWeights import *
+
"""
A training method for the QPalma project
don_supp = Donors[exampleId]
acc_supp = Acceptors[exampleId]
-
# Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
- true_SpliceAlign, true_weightMatch = computeSpliceAlign(dna, exons)
+ trueSpliceAlign, trueWeightMatch = computeSpliceAlign(dna, exons)
+ trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
pdb.set_trace()
- true_weightDon, true_weightAcc, true_weightIntron = computeSpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp)
-
-
+ #
+ # compute wrong alignments
+ #
+
+ # Compute donor, acceptor with penalty_lookup_new
+ # returns two double lists
+ [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ;
+
+ #myalign wants the acceptor site on the g of the ag
+ acceptor = [acceptor(2:end) -Inf] ;
+
+ # call myalign
+ [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ...
+ myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
+ remove_duplicate_scores, print_matrix);
+
+ SpliceAlign = double(SpliceAlign') ; %column
+ weightMatch = double(weightMatch') ;
+
+
iteration_nr += 1
break
for id = 1:N
%fprintf('%i\n', id) ;
if (mod(id,50)==0), fprintf(1,'.'); end
-
- %some assertions concerning the splicesites
- idx_don = find(don_supp~=-Inf) ;
- assert(all(dna(idx_don)=='g')) ;
- assert(all(dna(idx_don(idx_don<length(dna))+1)=='t' | dna(idx_don(idx_don<length(dna))+1)=='c')) ;
- idx_acc = find(acc_supp~=-Inf) ;
- assert(all(dna(idx_acc(idx_acc>3)-2)=='a')) ;
- assert(all(dna(idx_acc(idx_acc>2)-1)=='g')) ;
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% True Alignment and Comparison with wrong ones
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end ;
end
fprintf('\n') ;
-
-
-
+ #################################################################################
const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
objValue,w,self.slacks = solver.solve()
# maxgap=max(gap)
[mean(xis>=1) mean(gap>=1) mean(xis>=1e-3) mean(gap>=1e-3)]
- [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation) ;
-
- save(paramfilename, ...
- 'param', 'h', 'd', 'a', 'mmatrix', 'A', 'b') ;
-
if max(gap)<=column_eps:
break
-
assert(all(maxgap<=column_eps))
assert(length(take_idx)==N)
run()
"""
-
def __init__(self):
numAcceptorParams = 0
numDonorParams = 0