+ added c code from the palma project
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 30 Nov 2007 15:53:57 +0000 (15:53 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 30 Nov 2007 15:53:57 +0000 (15:53 +0000)
+ added own version of the swig interface

TODO

- think about wrapper for structs / cells
  user either SWIG constrcuts or native python object

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6902 e1793c9e-67f9-0310-80fc-b846ff1f7b36

19 files changed:
QPalmaDP/Makefile [new file with mode: 0644]
QPalmaDP/Mathmatics.cpp [new file with mode: 0644]
QPalmaDP/Mathmatics.h [new file with mode: 0644]
QPalmaDP/QPalmaDP.i [new file with mode: 0644]
QPalmaDP/common.h [new file with mode: 0644]
QPalmaDP/config.h [new file with mode: 0644]
QPalmaDP/fill_matrix.cpp [new file with mode: 0644]
QPalmaDP/fill_matrix.h [new file with mode: 0644]
QPalmaDP/io.cpp [new file with mode: 0644]
QPalmaDP/io.h [new file with mode: 0644]
QPalmaDP/penalty_info.cpp [new file with mode: 0644]
QPalmaDP/penalty_info.h [new file with mode: 0644]
QPalmaDP/print_align.cpp [new file with mode: 0644]
QPalmaDP/qpalma_dp.cpp [new file with mode: 0644]
QPalmaDP/qpalma_dp.h [new file with mode: 0644]
QPalmaDP/result_align.cpp [new file with mode: 0644]
python/.qpalma.py.swp
python/computeSpliceWeights.py
python/qpalma.py

diff --git a/QPalmaDP/Makefile b/QPalmaDP/Makefile
new file mode 100644 (file)
index 0000000..42574ea
--- /dev/null
@@ -0,0 +1,24 @@
+SRCS= Mathmatics.cpp\
+               fill_matrix.cpp\
+               result_align.cpp\
+               penalty_info.cpp\
+               print_align.cpp\
+               io.cpp
+
+OBJS = $(SRCS:%.cpp=%.o)
+
+#CXXFLAGS=-O3 -fPIC
+#CXXFLAGS=-O3 -fPIC -pg -fprofile-arcs
+CXXFLAGS=-O3 -fPIC -ggdb
+
+IF=QPalmaDP
+
+all: $(OBJS)
+       swig -c++ -python ${IF}.i
+       g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+       g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+       python -c "import ${IF}"
+
+clean:
+       rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
+
diff --git a/QPalmaDP/Mathmatics.cpp b/QPalmaDP/Mathmatics.cpp
new file mode 100644 (file)
index 0000000..61298da
--- /dev/null
@@ -0,0 +1,349 @@
+// Math.cpp: implementation of the CMath class.
+//
+//////////////////////////////////////////////////////////////////////
+
+
+#include "Mathmatics.h"
+#include "io.h"
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+#include <assert.h>
+
+//////////////////////////////////////////////////////////////////////
+// Construction/Destruction
+//////////////////////////////////////////////////////////////////////
+
+#ifdef USE_LOGCACHE
+//gene/math specific constants
+#ifdef USE_HMMDEBUG
+#define MAX_LOG_TABLE_SIZE 10*1024*1024
+#define LOG_TABLE_PRECISION 1e-6
+#else
+#define MAX_LOG_TABLE_SIZE 123*1024*1024
+#define LOG_TABLE_PRECISION 1e-15
+#endif
+
+INT CMath::LOGACCURACY         = 0; // 100000 steps per integer
+#endif
+
+INT CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
+
+#ifdef USE_PATHDEBUG
+const REAL CMath::INFTY            =  1e11;            // infinity
+#else
+const REAL CMath::INFTY            =  -log(0.0);       // infinity
+#endif
+const REAL CMath::ALMOST_NEG_INFTY =  -1000;   
+
+CHAR CMath::rand_state[256];
+
+CMath::CMath()
+{
+       struct timeval tv;
+       gettimeofday(&tv, NULL);
+       UINT seed=(UINT) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
+       initstate(seed, CMath::rand_state, sizeof(CMath::rand_state));
+       CIO::message(M_INFO, "seeding random number generator with %u\n", seed);
+
+#ifdef USE_LOGCACHE
+    LOGRANGE=CMath::determine_logrange();
+    LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
+    CIO::message(M_INFO, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE,LOGACCURACY,sizeof(REAL),LOGRANGE*LOGACCURACY*sizeof(REAL)/(1024.0*1024.0)) ;
+   
+    CMath::logtable=new REAL[LOGRANGE*LOGACCURACY];
+    init_log_table();
+    CIO::message(M_INFO, "Done.\n") ;
+#else
+       INT i=0;
+       while ((REAL)log(1+((REAL)exp(-REAL(i)))))
+               i++;
+    CIO::message(M_INFO, "determined range for x in log(1+exp(-x)) is:%d\n", i);
+       LOGRANGE=i;
+#endif 
+}
+
+CMath::~CMath()
+{
+#ifdef USE_LOGCACHE
+       delete[] logtable;
+#endif
+}
+
+#ifdef USE_LOGCACHE
+INT CMath::determine_logrange()
+{
+    INT i;
+    REAL acc=0;
+    for (i=0; i<50; i++)
+    {
+       acc=((REAL)log(1+((REAL)exp(-REAL(i)))));
+       if (acc<=(REAL)LOG_TABLE_PRECISION)
+           break;
+    }
+
+    CIO::message(M_INFO, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
+    return i;
+}
+
+INT CMath::determine_logaccuracy(INT range)
+{
+    range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(REAL));
+    CIO::message(M_INFO, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
+    return range;
+}
+
+//init log table of form log(1+exp(x))
+void CMath::init_log_table()
+{
+  for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
+    logtable[i]=log(1+exp(REAL(-i)/REAL(LOGACCURACY)));
+}
+#endif
+
+void CMath::sort(INT *a, INT cols, INT sort_col)
+{
+  INT changed=1;
+  if (a[0]==-1) return ;
+  while (changed)
+  {
+      changed=0; INT i=0 ;
+      while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
+         {
+                 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
+                 {
+                         for (INT j=0; j<cols; j++)
+                                 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
+                         changed=1 ;
+                 } ;
+                 i++ ;
+         } ;
+  } ;
+} 
+
+void CMath::sort(REAL *a, INT* idx, INT N) 
+{
+
+       INT changed=1;
+       while (changed)
+       {
+               changed=0;
+               for (INT i=0; i<N-1; i++)
+               {
+                       if (a[i]>a[i+1])
+                       {
+                               swap(a[i],a[i+1]) ;
+                               swap(idx[i],idx[i+1]) ;
+                               changed=1 ;
+                       } ;
+               } ;
+       } ;
+        
+} 
+
+
+
+//plot x- axis false positives (fp) 1-Specificity
+//plot y- axis true positives (tp) Sensitivity
+INT CMath::calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile)
+{
+       INT left=0;
+       INT right=size-1;
+       INT i;
+
+       for (i=0; i<size; i++)
+       {
+               if (!(label[i]== -1 || label[i]==1))
+                       return -1;
+       }
+
+       //sort data such that -1 labels first +1 behind
+       while (left<right)
+       {
+               while ((label[left] < 0) && (left<right))
+                       left++;
+               while ((label[right] > 0) && (left<right))      //warning: label must be either -1 or +1
+                       right--;
+
+               swap(output[left],output[right]);
+               swap(label[left], label[right]);
+       }
+
+       // neg/pos sizes
+       negsize=left;
+       possize=size-left;
+       REAL* negout=output;
+       REAL* posout=&output[left];
+
+       // sort the pos and neg outputs separately
+       qsort(negout, negsize);
+       qsort(posout, possize);
+
+       // set minimum+maximum values for decision-treshhold
+       REAL minimum=min(negout[0], posout[0]);
+       REAL maximum=minimum;
+       if (negsize>0)
+               maximum=max(maximum,negout[negsize-1]);
+       if (possize>0)
+               maximum=max(maximum,posout[possize-1]);
+
+       REAL treshhold=minimum;
+       REAL old_treshhold=treshhold;
+
+       //clear array.
+       for (i=0; i<size; i++)
+       {
+               fp[i]=1.0;
+               tp[i]=1.0;
+       }
+
+       //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
+       //everything right of {pos,neg}idx is considered to beLONG to +1
+       INT posidx=0;
+       INT negidx=0;
+       INT iteration=1;
+       INT returnidx=-1;
+
+       REAL minerr=10;
+
+       while (iteration < size && treshhold<=maximum)
+       {
+               old_treshhold=treshhold;
+
+               while (treshhold==old_treshhold && treshhold<=maximum)
+               {
+                       if (posidx<possize && negidx<negsize)
+                       {
+                               if (posout[posidx]<negout[negidx])
+                               {
+                                       if (posout[posidx]==treshhold)
+                                               posidx++;
+                                       else
+                                               treshhold=posout[posidx];
+                               }
+                               else
+                               {
+                                       if (negout[negidx]==treshhold)
+                                               negidx++;
+                                       else
+                                               treshhold=negout[negidx];
+                               }
+                       }
+                       else
+                       {
+                               if (posidx>=possize && negidx<negsize-1)
+                               {
+                                       negidx++;
+                                       treshhold=negout[negidx];
+                               }
+                               else if (negidx>=negsize && posidx<possize-1)
+                               {
+                                       posidx++;
+                                       treshhold=posout[posidx];
+                               }
+                               else if (negidx<negsize && treshhold!=negout[negidx])
+                                       treshhold=negout[negidx];
+                               else if (posidx<possize && treshhold!=posout[posidx])
+                                       treshhold=posout[posidx];
+                               else
+                               {
+                                       treshhold=2*(maximum+100); // force termination
+                                       posidx=possize;
+                                       negidx=negsize;
+                                       break;
+                               }
+                       }
+               }
+
+               //calc tp,fp
+               tp[iteration]=(possize-posidx)/(REAL (possize));
+               fp[iteration]=(negsize-negidx)/(REAL (negsize));
+
+               //choose poINT with minimal error
+               if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
+               {
+                       minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
+                       tresh=(old_treshhold+treshhold)/2;
+                       returnidx=iteration;
+               }
+
+               iteration++;
+       }
+
+       // set new size
+       size=iteration;
+
+       if (rocfile)
+       {
+               const CHAR id[]="ROC";
+               fwrite(id, sizeof(char), sizeof(id), rocfile);
+               fwrite(fp, sizeof(REAL), size, rocfile);
+               fwrite(tp, sizeof(REAL), size, rocfile);
+       }
+
+       return returnidx;
+}
+
+UINT CMath::crc32(BYTE *data, INT len)
+{
+    UINT        result;
+    INT                 i,j;
+    BYTE       octet;
+
+    result = 0-1;
+
+    for (i=0; i<len; i++)
+    {
+       octet = *(data++);
+       for (j=0; j<8; j++)
+       {
+           if ((octet >> 7) ^ (result >> 31))
+           {
+               result = (result << 1) ^ 0x04c11db7;
+           }
+           else
+           {
+               result = (result << 1);
+           }
+           octet <<= 1;
+       }
+    }
+
+    return ~result; 
+}
+
+double CMath::mutual_info(REAL* p1, REAL* p2, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               for (INT j=0; j<len; j++)
+                       e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
+
+       return e;
+}
+
+double CMath::relative_entropy(REAL* p, REAL* q, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               e+=exp(p[i])*(p[i]-q[i]);
+
+       return e;
+}
+
+double CMath::entropy(REAL* p, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               e-=exp(p[i])*p[i];
+
+       return e;
+}
diff --git a/QPalmaDP/Mathmatics.h b/QPalmaDP/Mathmatics.h
new file mode 100644 (file)
index 0000000..2e24d48
--- /dev/null
@@ -0,0 +1,546 @@
+#ifndef __MATHMATICS_H_
+#define __MATHMATICS_H_
+
+#include "common.h"
+#include "io.h"
+
+#include <math.h>
+#include <stdio.h>
+
+//define finite for win32
+#ifdef _WIN32
+#include <float.h>
+#define finite _finite
+#define isnan _isnan
+#endif
+
+#ifndef NAN
+#include <stdlib.h>
+#define NAN (strtod("NAN",NULL))
+#endif
+
+#ifdef SUNOS
+extern "C" int finite(double);
+#endif
+
+/** Mathematical Functions.
+ * Class which collects generic mathematical functions
+ */
+class CMath  
+{
+public:
+       /**@name Constructor/Destructor.
+        */
+       //@{
+       ///Constructor - initializes log-table
+       CMath();
+
+       ///Destructor - frees logtable
+       virtual ~CMath();
+       //@}
+
+       /**@name min/max/abs functions.
+       */
+       //@{
+
+       ///return the minimum of two integers
+       //
+       template <class T>
+       static inline T min(T a, T b)
+       {
+               return ((a)<=(b))?(a):(b);
+       }
+
+       ///return the maximum of two integers
+       template <class T>
+       static inline T max(T a, T b) 
+       {
+               return ((a)>=(b))?(a):(b);
+       }
+
+       ///return the maximum of two integers
+       template <class T>
+       static inline T abs(T a)
+       {
+               return ((a)>=(0))?(a):(-a);
+       }
+       //@}
+
+       /**@name misc functions */
+       //@{
+
+       /// crc32
+       static UINT crc32(BYTE *data, INT len);
+
+       static inline REAL round(REAL d)
+       {
+               return floor(d+0.5);
+       }
+
+       /// signum of type T variable a 
+       template <class T>
+       static inline REAL sign(REAL a)
+       {
+               if (a==0)
+                       return 0;
+               else return (a<0) ? (-1) : (+1);
+       }
+
+       /// swap floats a and b
+       template <class T>
+       static inline void swap(T & a,T &b)
+       {
+               T c=a ;
+               a=b; b=c ;
+       }
+
+       /// x^2
+       template <class T>
+       static inline T sq(T x)
+       {
+               return x*x;
+       }
+
+       static inline LONG factorial(INT n)
+       {
+               LONG res=1 ;
+               for (int i=2; i<=n; i++)
+                       res*=i ;
+               return res ;
+       }
+
+       static inline LONG nchoosek(INT n, INT k)
+       {
+               long res=1 ;
+
+               for (INT i=n-k+1; i<=n; i++)
+                       res*=i ;
+
+               return res/factorial(k) ;
+       }
+
+       /** performs a bubblesort on a given matrix a.
+        * it is sorted from in ascending order from top to bottom
+        * and left to right */
+       static void sort(INT *a, INT cols, INT sort_col=0) ;
+       static void sort(REAL *a, INT*idx, INT N) ;
+       
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending (for type T) */
+       template <class T>
+       static void qsort(T* output, INT size) ;
+
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending order 
+        * (for type T1) and returns the index (type T2)
+        * matlab alike [sorted,index]=sort(output) 
+        */
+       template <class T1,class T2>
+       static void qsort(T1* output, T2* index, INT size);
+
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending order
+        * (for type T1) and returns the index (type T2)
+        * matlab alike [sorted,index]=sort(output) 
+        */
+       template <class T1,class T2>
+       static void qsort_backward(T1* output, T2* index, INT size);
+
+     /* finds the smallest element in output and puts that element as the 
+               first element  */
+       template <class T>
+       static void min(REAL* output, T* index, INT size);
+
+     /* finds the n smallest elements in output and puts these elements as the 
+               first n elements  */
+       template <class T>
+       static void nmin(REAL* output, T* index, INT size, INT n);
+
+       /* performs a inplace unique of a vector of type T using quicksort 
+        * returns the new number of elements */
+       template <class T>
+       static INT unique(T* output, INT size) 
+       {
+               qsort(output, size) ;
+               INT i,j=0 ;
+               for (i=0; i<size; i++)
+                       if (i==0 || output[i]!=output[i-1])
+                               output[j++]=output[i] ;
+               return j ;
+       }
+
+       /* finds an element in a sorted array via binary search
+     * returns -1 if not found */
+       static inline INT fast_find(WORD* output, INT size, WORD elem)
+       {
+               INT start=0, end=size-1, middle=size/2 ;
+
+               if (output[start]>elem || output[end]<elem)
+                       return -1 ;
+
+               while (1)
+               {
+                       if (output[middle]>elem)
+                       {
+                               end = middle ;
+                               middle=start+(end-start)/2 ;
+                       } ;
+                       if (output[middle]<elem)
+                       {
+                               start = middle ;
+                               middle=start+(end-start)/2 ;
+                       }
+                       if (output[middle]==elem)
+                               return middle ;
+                       if (end-start<=1)
+                       {
+                               if (output[start]==elem)
+                                       return start ;
+                               if (output[end]==elem)
+                                       return end ;
+                               return -1 ;
+                       }
+               }
+       }
+
+       /* finds an element in a sorted array via binary search
+        * that is smaller-equal elem und the next element is larger-equal  */
+       static inline INT fast_find_range(REAL* output, INT size, REAL elem)
+       {
+               INT start=0, end=size-2, middle=(size-2)/2 ;
+
+               if (output[start]>elem)
+                       return -1 ;
+               if (output[end]<=elem)
+                       return size-1 ;
+
+               while (1)
+               {
+                       if (output[middle]>elem)
+                       {
+                               end = middle ;
+                               middle=start+(end-start)/2 ;
+                       } ;
+                       if (output[middle]<elem)
+                       {
+                               start = middle ;
+                               middle=start+(end-start)/2 ;
+                       }
+                       if (output[middle]<=elem && output[middle+1]>=elem)
+                               return middle ;
+                       if (end-start<=1)
+                       {
+                               if (output[start]<=elem && output[start+1]>=elem)
+                                       return start ;
+                               return end ;
+                       }
+               }
+       }
+
+       /** calculates ROC into (fp,tp)
+        * from output and label of length size 
+        * returns index with smallest error=fp+fn
+        */
+       static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
+       //@}
+
+       /// returns the mutual information of p which is given in logspace
+       /// where p,q are given in logspace
+       static double mutual_info(REAL* p1, REAL* p2, INT len);
+
+       /// returns the relative entropy H(P||Q), 
+       /// where p,q are given in logspace
+       static double relative_entropy(REAL* p, REAL* q, INT len);
+
+       /// returns entropy of p which is given in logspace
+       static double entropy(REAL* p, INT len);
+
+       /**@name summing functions */
+       //@{ 
+       /** sum logarithmic probabilities.
+        * Probability measures are summed up but are now given in logspace where
+        * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
+        * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
+#ifdef USE_LOGCACHE
+       static inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+               if (finite(p))
+               {
+                       if (finite(q))
+                       {
+
+                               register REAL diff=p-q;
+
+                               if (diff>0)             //p>q
+                               {
+                                       if (diff > LOGRANGE)
+                                               return p;
+                                       else
+                                               return  p + logtable[(int)(diff*LOGACCURACY)];
+                               }
+                               else                    //p<=q
+                               {
+                                       if (-diff > LOGRANGE)
+                                               return  q;
+                                       else
+                                               return  q + logtable[(int)(-diff*LOGACCURACY)];
+                               }
+                       }
+                       CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
+                       return NAN;
+               }
+               else 
+                       return q;
+       }
+
+       ///init log table of form log(1+exp(x))
+       static void init_log_table();
+       
+       /// determine INT x for that log(1+exp(-x)) == 0
+       static INT determine_logrange();
+
+       /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
+       static INT determine_logaccuracy(INT range);
+#else
+       /*
+       inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+           double result=comp_logarithmic_sum(p,q);
+
+           printf("diff:%f <-> %f\n",p-q, result);
+           return result;
+       }*/
+
+       static inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+               if (finite(p))
+               {
+                       if (finite(q))
+                       {
+
+                               register REAL diff=p-q;
+                               if (diff>0)             //p>q
+                               {
+                                       if (diff > LOGRANGE)
+                                               return p;
+                                       else
+                                               return  p + log(1+exp(-diff));
+                               }
+                               else                    //p<=q
+                               {
+                                       if (-diff > LOGRANGE)
+                                               return  q;
+                                       else
+                                               return  q + log(1+exp(diff));
+                               }
+                       }
+                       return p;
+               }
+               else 
+                       return q;
+       }
+#endif
+#ifdef LOG_SUM_ARRAY
+       /** sum up a whole array of values in logspace.
+        * This function addresses the numeric instabiliy caused by simply summing up N elements by adding 
+        * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
+        * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
+        */
+       static inline REAL logarithmic_sum_array(REAL *p, INT len)
+         {
+           if (len<=2)
+             {
+               if (len==2)
+                 return logarithmic_sum(p[0],p[1]) ;
+               if (len==1)
+                 return p[0];
+               return -INFTY ;
+             }
+           else
+             {
+               register REAL *pp=p ;
+               if (len%2==1) pp++ ;
+               for (register INT j=0; j < len>>1; j++)
+                 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
+             }
+           return logarithmic_sum_array(p,len%2+len>>1) ;
+         } 
+#endif
+       //@}
+public:
+       /**@name constants*/
+       //@{
+       /// infinity
+       static const REAL INFTY;
+       
+       /// almost neg (log) infinity
+       static const REAL ALMOST_NEG_INFTY;
+
+       /// range for logtable: log(1+exp(x))  -LOGRANGE <= x <= 0
+       static INT LOGRANGE;
+       
+#ifdef USE_LOGCACHE    
+       /// number of steps per integer
+       static INT LOGACCURACY;
+       //@}
+protected:
+       ///table with log-values
+       static REAL* logtable;  
+#endif
+       static CHAR rand_state[256];
+};
+
+
+//implementations of template functions
+template <class T>
+void CMath::qsort(T* output, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] > output [1])
+                       swap(output[0],output[1]);
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+
+               INT left=0;
+               INT right=size-1;
+
+               while (left<=right)
+               {
+                       while (output[left] < split)
+                               left++;
+                       while (output[right] > split)
+                               right--;
+
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               left++;
+                               right--;
+                       }
+               }
+
+               if (right+1> 1)
+                       qsort(output,right+1);
+
+               if (size-left> 1)
+                       qsort(&output[left],size-left);
+       }
+}
+
+template <class T1,class T2>
+void CMath::qsort(T1* output, T2* index, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] > output [1]){
+                       swap(output[0],output[1]);
+                       swap(index[0],index[1]);
+               }
+               
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+               
+               INT left=0;
+               INT right=size-1;
+               
+               while (left<=right)
+               {
+                       while (output[left] < split)
+                               left++;
+                       while (output[right] > split)
+                               right--;
+                       
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               swap(index[left],index[right]);
+                               left++;
+                               right--;
+                       }
+               }
+               
+               if (right+1> 1)
+                       qsort(output,index,right+1);
+               
+               if (size-left> 1)
+                       qsort(&output[left],&index[left], size-left);
+       }
+}
+
+template <class T1,class T2>
+void CMath::qsort_backward(T1* output, T2* index, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] < output [1]){
+                       swap(output[0],output[1]);
+                       swap(index[0],index[1]);
+               }
+               
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+               
+               INT left=0;
+               INT right=size-1;
+               
+               while (left<=right)
+               {
+                       while (output[left] > split)
+                               left++;
+                       while (output[right] < split)
+                               right--;
+                       
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               swap(index[left],index[right]);
+                               left++;
+                               right--;
+                       }
+               }
+               
+               if (right+1> 1)
+                       qsort(output,index,right+1);
+               
+               if (size-left> 1)
+                       qsort(&output[left],&index[left], size-left);
+       }
+}
+
+template <class T> 
+void CMath::nmin(REAL* output, T* index, INT size, INT n)
+{
+       if (6*n*size<13*size*log((double)size))
+               for (INT i=0; i<n; i++)
+                       min(&output[i], &index[i], size-i) ;
+       else
+               qsort(output, index, size) ;
+}
+
+template <class T>
+void CMath::min(REAL* output, T* index, INT size)
+{
+       if (size<=0)
+               return ;
+       REAL min_elem = output[0] ;
+       INT min_index = 0 ;
+       for (INT i=1; i<size; i++)
+               if (output[i]<min_elem)
+               {
+                       min_index=i ;
+                       min_elem=output[i] ;
+               }
+       swap(output[0], output[min_index]) ;
+       swap(index[0], index[min_index]) ;
+}
+#endif
diff --git a/QPalmaDP/QPalmaDP.i b/QPalmaDP/QPalmaDP.i
new file mode 100644 (file)
index 0000000..05dd99c
--- /dev/null
@@ -0,0 +1,46 @@
+%module QPalmaDP
+%{
+#include "qpalma_dp.h"
+%}
+
+%include "std_vector.i"
+%include "std_string.i"
+
+%include "carrays.i"
+
+%include "qpalma_dp.h"
+
+
+%array_functions(int, intArray) ;
+%array_functions(double, doubleArray) ;
+%array_class(Pre_score, Pre_scoreArray) ;
+
+%pythoncode
+%{
+
+def createDoubleArrayFromList(list):
+   array = new_doubleArray(len(list))
+   for i in range(len(list)):
+      doubleArray_setitem(array, i, list[i])
+   return array
+
+def createIntArrayFromList(list):
+   array = new_intArray(len(list))
+   for i in range(len(list)):
+      intArray_setitem(array, i, list[i])
+   return array
+
+def createListFromIntArray(array, array_len):
+   list = [0]*array_len
+   for i in range(array_len):
+      list[i] = intArray_getitem(array,i)
+   return list
+
+def createListFromDoubleArray(array, array_len):
+   list = [0]*array_len
+   for i in range(array_len):
+      list[i] = doubleArray_getitem(array,i)
+   return list
+
+%} 
+
diff --git a/QPalmaDP/common.h b/QPalmaDP/common.h
new file mode 100644 (file)
index 0000000..61776e9
--- /dev/null
@@ -0,0 +1,160 @@
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <stdlib.h> 
+#include <stdio.h> 
+#include "config.h"
+
+#ifdef SUNOS
+#define bool int
+#define false 0
+#define true 1
+#endif
+
+#ifndef LINUX
+#define RANDOM_MAX 2147483647
+#else
+#define RANDOM_MAX RAND_MAX
+#endif
+
+/**@name Standard Types 
+ * Definition of Platform independent Types
+*/
+//@{
+
+/// Type CHAR
+typedef char CHAR;
+typedef CHAR* P_CHAR;
+
+/// Type BYTE 
+typedef unsigned char BYTE;
+typedef BYTE* P_BYTE;
+
+/// Type SHORT is 2 bytes in size
+typedef short int SHORT;
+typedef SHORT* P_SHORT;
+
+/// Type WORD is 2 bytes in size
+typedef unsigned short int WORD;
+typedef WORD* P_WORD;
+
+/// Type INT is 4 bytes in size
+typedef int INT;
+typedef INT* P_INT;
+
+/// Type INT is 4 bytes in size
+typedef unsigned int UINT;
+typedef UINT* P_UINT;
+
+/// Type LONG is 8 bytes in size
+typedef long LONG;
+typedef LONG* P_LONG;
+
+/// Type SHORTREAL is 4 bytes in size
+typedef float SHORTREAL;
+typedef SHORTREAL* P_SHORTREAL;
+
+/// Type REAL is 8 bytes in size
+typedef double REAL;
+typedef REAL* P_REAL;
+
+/// Type LONGREAL is 16 bytes in size
+//typedef long double LONGREAL;
+//typedef LONGREAL* P_LONGREAL;
+
+#ifdef USE_SHORTREAL_KERNELCACHE
+       typedef SHORTREAL KERNELCACHE_ELEM;
+#else
+       typedef REAL KERNELCACHE_ELEM;
+#endif
+
+typedef KERNELCACHE_ELEM P_KERNELCACHE_ELEM;
+
+typedef LONG KERNELCACHE_IDX;
+
+/// The io libs output [DEBUG] etc in front of every CIO::message
+/// 'higher' messages filter output depending on the loglevel, i.e. CRITICAL messages
+/// will print all M_CRITICAL TO M_EMERGENCY messages to
+enum EMessageType
+{
+       M_DEBUG,
+       M_INFO,
+       M_NOTICE,
+       M_WARN,
+       M_ERROR,
+       M_CRITICAL,
+       M_ALERT,
+       M_EMERGENCY,
+       M_MESSAGEONLY,
+       M_PROGRESS
+};
+
+enum EKernelType
+{
+       K_UNKNOWN = 0,
+       K_OPTIMIZABLE = 4096,
+       K_LINEAR = 10 | K_OPTIMIZABLE,
+       K_POLY  = 20,
+       K_GAUSSIAN = 30,
+       K_HISTOGRAM = 40,
+       K_SALZBERG = 41,
+       K_LOCALITYIMPROVED = 50,
+       K_SIMPLELOCALITYIMPROVED = 60,
+       K_FIXEDDEGREE = 70,
+       K_WEIGHTEDDEGREE =    80 | K_OPTIMIZABLE,
+       K_WEIGHTEDDEGREEPOS = 81 | K_OPTIMIZABLE,
+       K_WEIGHTEDDEGREEPOLYA =    82,
+       K_WD = 83,
+       K_COMMWORD = 90 | K_OPTIMIZABLE ,
+       K_POLYMATCH = 100,
+       K_ALIGNMENT = 110,
+       K_COMMWORDSTRING = 120 | K_OPTIMIZABLE,
+       K_SPARSENORMSQUARED = 130,
+       K_COMBINED = 140 | K_OPTIMIZABLE
+};
+
+enum EFeatureType
+{
+       F_UNKNOWN = 0,
+       F_REAL = 10,
+       F_SHORT = 20,
+       F_CHAR = 30,
+       F_INT = 40,
+       F_BYTE = 50,
+       F_WORD = 60
+};
+
+enum EFeatureClass
+{
+       C_UNKNOWN = 0,
+       C_SIMPLE = 10,
+       C_SPARSE = 20,
+       C_STRING = 30,
+       C_COMBINED = 40
+};
+
+/// Alphabet of charfeatures/observations
+enum E_ALPHABET
+{
+       /// DNA - letters A,C,G,T,*,N,n
+       DNA=0,
+
+       /// PROTEIN - letters a-z
+       PROTEIN=1,
+
+       /// ALPHANUM - [0-9a-z]
+       ALPHANUM=2,
+
+       /// CUBE - [1-6]
+       CUBE=3,
+
+       /// NONE - type has no alphabet
+       NONE=4
+};
+
+//@}
+
+#define TMP_DIR "/tmp/"
+//#define TMP_DIR "/short/x46/tmp/"
+
+#endif
diff --git a/QPalmaDP/config.h b/QPalmaDP/config.h
new file mode 100644 (file)
index 0000000..88b448b
--- /dev/null
@@ -0,0 +1,19 @@
+
+#define HAVE_MATLAB 1
+
+#define HAVE_LARGEFILE 1
+
+
+
+
+
+
+#define USE_BIGSTATES 1
+
+
+#define USE_HMMCACHE 1
+
+
+
+
+
diff --git a/QPalmaDP/fill_matrix.cpp b/QPalmaDP/fill_matrix.cpp
new file mode 100644 (file)
index 0000000..e36af54
--- /dev/null
@@ -0,0 +1,421 @@
+/*********************************************************************************************/
+// Version of fill_matrix with fills several alignment matrices (best, second_best ...)
+// nr_paths det. number of these matrices
+//
+// est_len: Number of columns in alignment matrix: length(EST) +1 (for the gap) (variable: i)
+// dna_len: Number of lines in alignment matrix: length(DNA) +1 (for the gap) (variable: j)
+//
+// !!! IMPORTANT !!! The pictures and equations in the paper/poster etc. describe the algorithm
+// for an alignment matrix where DNA and EST changed places !!! IMPORTANT !!!
+/*********************************************************************************************/
+
+/*
+  matchmatrix: columnwise
+    - A C G T N (dna)
+  - x x x x x x
+  A x x x x x x        
+  C x x x x x x
+  G x x x x x x
+  T x x x x x x
+  N x x x x x x
+(est)
+*/
+
+/*
+  alignment matrix: columnwise
+   |-EST . . .
+  -+------------
+  -|00 ...
+  D|0.
+  N|. .
+  A|.  .
+  .|.
+  .|
+  .|
+*/
+
+
+/* TODO
+   - max_len = 10000 for elegans!
+   - do not use double
+
+*/
+
+
+#include "fill_matrix.h"
+
+using namespace std;
+
+int check_char(char base)
+{
+  switch(base)
+  {
+  case 'A':
+  case 'a': return 1;
+  case 'C':    
+  case 'c': return 2;
+  case 'G':
+  case 'g': return 3;
+  case 'T':
+  case 't': return 4;
+  case 'N':
+  case 'n': return 5; 
+  default : return 0;
+  }
+  
+}
+
+
+
+void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, penalty_struct* functions , double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
+{      
+
+  /*********************************************************************************************/
+  // variables
+  /*********************************************************************************************/
+  const int mlen = 6; // length of matchmatrix
+  int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
+  double prevValue ;
+  double tempValue;
+  int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
+  //printf("max_len: %d\n", max_len) ;
+
+  int num_elem ; // counter variable
+
+  const int max_num_elem = (4+nr_paths+max_len) * nr_paths ; // upper bound on num_elem
+  double *scores=new double[max_num_elem] ; //are sorted later
+  struct ArrayElem *array=new ArrayElem[max_num_elem] ;
+  //printf("%d\n", max_num_elem*sizeof(ArrayElem)) ;
+  
+  int max_ss_element[nr_paths*nr_paths]; //position on dna with max ss score for introns of length greater than max_len
+  int splice_pos ; //splice position get from donor_sites
+  int start_splice_pos ; //where to start looking in donor_sites
+
+  double max_scores[nr_paths] ;
+
+  /*********************************************************************************************/
+  // Initialisation of all alignment matrices, columnwise
+  /*********************************************************************************************/
+  for (int z=0; z<nr_paths; z++) /* matrice z */
+  {
+    max_scores[z] = log(0.0) ;
+    max_score_positions[2*z]    = 0; // pos i
+    max_score_positions[2*z +1] = 0; // pos j
+
+    for (int i=0; i<est_len; i++) /* i.column */
+    {
+      for (int j=0; j<dna_len; j++) /* j.line */
+      {
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->value = log(0.0); // -inf
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_i = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_j = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_matrix_no = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->issplice = 0;
+      }
+    }
+  }
+
+       
+  /*********************************************************************************************/      
+  // Element (0,0) for first/best matrice (z=0)
+  /*********************************************************************************************/       
+  ((Pre_score*)matrices[0])->value = 0;
+  ((Pre_score*)matrices[0])->prev_i = 0;
+  ((Pre_score*)matrices[0])->prev_j = 0;
+  ((Pre_score*)matrices[0])->prev_matrix_no = 0;
+  ((Pre_score*)matrices[0])->issplice = 0;
+
+  /*********************************************************************************************/      
+  // Line Zero for first/best matrice (z=0)
+  /*********************************************************************************************/
+  for (int i=1; i<est_len; i++) /* 0.line, all columns (DNA='-') */
+  {    
+    ((Pre_score*)matrices[0] +(i*dna_len))->value = 0 ;                
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_i = i-1;
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_j = 0;    
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_matrix_no = 0;
+    ((Pre_score*)matrices[0] +(i*dna_len))->issplice = 0;
+  }
+
+
+  /*********************************************************************************************/      
+  // Column Zero for first/best matrice (z=0)
+  /*********************************************************************************************/      
+  for (int j=1; j<dna_len; j++) // 0.column, all lines
+  {      
+    ((Pre_score*)matrices[0] +j)->value =  0 ;
+    ((Pre_score*)matrices[0] +j)->prev_i = 0 ;
+    ((Pre_score*)matrices[0] +j)->prev_j = j-1 ;
+    ((Pre_score*)matrices[0] +j)->prev_matrix_no = 0 ;
+    ((Pre_score*)matrices[0] +j)->issplice = 0;
+  }
+
+   
+  /*********************************************************************************************/      
+  // !!!! Column Zero and line Zero for all other matrices except the best one (z>0): -INF !!!! 
+  /*********************************************************************************************/      
+
+
+  
+  /*********************************************************************************************/
+  // all other entries for all matrices, last column and row treated seperately
+  /*********************************************************************************************/
+  
+  /* columnwise, 0.column and 0.line already filled */
+  for (int i=1; i<est_len; i++) /* i.column */
+  {
+    
+    start_splice_pos = 0 ; //where to start looking in donor_sites
+    for (int zz=0; zz<nr_paths*nr_paths; zz++) {max_ss_element[zz]=0 ;} // initializing
+  
+    for (int j=1; j<dna_len; j++) //j.line
+    {
+      dnaChar = check_char(dna[j-1]) ; // -1 because of of the gaps in the 0.line/column of the al-matrix
+      estChar = check_char(est[i-1]) ;
+
+      
+      //if introns of length greater than max_len possible
+      if (j > max_len)
+      {
+       //storing highest ss_score (the position) and NOT -INF
+       int new_ss_pos = j-max_len ;
+       double donor_score = donor[new_ss_pos-1] ;
+       if (isfinite(donor_score)) //splice site!
+        {
+         if (max_ss_element[nr_paths-1] != 0)
+          { 
+           for (int zz=0; zz<nr_paths; zz++) //for each alignment matrix
+            {
+             int pos_with_minscore = nr_paths ; //assuming, the new score is the worst score
+             double new_intron_score = donor_score+((Pre_score*)matrices[zz]+(i*dna_len)+new_ss_pos-1)->value ; //could be -INF
+             if (!(isfinite(new_intron_score))) {printf("new_intron_score is -INF\n") ;}
+             for (int pos=0; pos<nr_paths; pos++) //find worst score of all nr_paths+1 scores
+              {
+               int act_splice_position = max_ss_element[zz*nr_paths+pos] ;
+               assert(isfinite(donor[act_splice_position-1])) ;
+               double act_intron_score = donor[act_splice_position-1]+((Pre_score*)matrices[zz]+(i*dna_len)+act_splice_position-1)->value ;
+               if (new_intron_score >= act_intron_score) 
+                {
+                 new_intron_score = act_intron_score ;
+                 pos_with_minscore = pos ;
+               } //else nothing happens, new intron score still the worst one
+             }
+             if (pos_with_minscore != nr_paths)
+              {
+               max_ss_element[zz*nr_paths+pos_with_minscore] = new_ss_pos ; //replacing
+             }
+           }
+         }
+         else
+          {
+           for (int pos=0; pos<nr_paths; pos++)
+            {
+             if (max_ss_element[pos]==0)  //no previous splice site at this pos
+              {
+               for (int zz=0; zz<nr_paths; zz++) {max_ss_element[zz*nr_paths+pos]=j-max_len ;}
+               break ;
+             }
+           } 
+         }
+       }
+       //start_pos in donor_sites so that only introns shorter then max_length are seen
+       if ((start_splice_pos < nr_donor_sites) && donor_sites[start_splice_pos] == j-max_len)
+        {
+         start_splice_pos++ ;
+       }
+      }
+      
+
+      /* Computing all possible scores */
+      num_elem = 0 ; /* number of different scores/possibilities so far */
+      for (int z=0; z<nr_paths; z++) /* matrice z */
+      {
+       Pre_score* actMatrix = (Pre_score*)matrices[z]; /* hm, why?  */
+
+       /***************************************/
+       // 0. Zero (local alignment version)
+        /***************************************/
+       assert(num_elem<max_num_elem) ;
+       array[num_elem].prev_i = 0; 
+       array[num_elem].prev_j = 0; 
+       array[num_elem].prev_matrix_no = z;
+       array[num_elem].isSplice = false; /* no splice site */
+       scores[num_elem] = 0 ;
+       num_elem++ ;
+
+
+        /***************************************/
+       // 1. gap on EST (j-1,i) -> (j,i) 
+        /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
+       if (isfinite(prevValue))
+       {
+         tempValue = prevValue +(matchmatrix[mlen* dnaChar]); /* score(gap,DNA) */
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i; 
+           array[num_elem].prev_j = j-1; /* predecessor */
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false; /* no splice site */
+           scores[num_elem] = -tempValue ;
+           num_elem++ ;
+         }
+       }
+      
+
+       /***************************************/
+       // 2. match/mismatch (j-1,i-1) -> (j,i)
+       /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
+       if (isfinite(prevValue))
+       {
+         tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);//DNA mit EST
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i-1; /* predecessor */
+           array[num_elem].prev_j = j-1; /* predecessor */
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false; /* no splice site */
+           scores[num_elem] = -tempValue ;
+           num_elem++ ;
+         }
+       }
+       
+
+       /***************************************/
+       // 3. gap on DNA (j,i-1) -> (j,i)
+       /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
+       if (isfinite(prevValue))
+       {
+         tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i-1; /* predecessor */
+           array[num_elem].prev_j = j; 
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false;  /* no splice site */
+           scores[num_elem]=-tempValue ;
+           num_elem++ ;
+         }
+         
+       }
+
+
+       /***************************************/
+       // 4. all possible splice sites/introns
+       /***************************************/
+       // acceptor[j-1]: g of ag (-1 because cpp starts counting at 0)
+       // donor[splice_pos-1]: g of gt/gc (-1 because cpp starts counting at 0)
+       // donor_sites: the g's of the gt/gc
+       
+       if (isfinite(acceptor[j-1]))
+       {
+         
+         //the nr_paths many introns of length greater than max_len
+         for (int pos=0; pos<nr_paths; pos++)
+          {
+           if (max_ss_element[z*nr_paths+pos]>0)
+            {
+             assert((j-max_ss_element[z*nr_paths+pos]+1) > max_len) ;
+             splice_pos = max_ss_element[z*nr_paths+pos] ; //the g of the gt 
+             prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+             if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+              {
+               int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+               assert(dist>max_len) ;
+               tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+               if (isfinite(tempValue))
+                {
+                 assert(num_elem<max_num_elem) ;
+                 array[num_elem].prev_i = i; 
+                 array[num_elem].prev_j = splice_pos-1; // predecessor
+                 array[num_elem].prev_matrix_no = z;
+                 array[num_elem].isSplice = true; // splice site
+                 scores[num_elem]=-tempValue ;
+                 num_elem++ ;
+               }
+               else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+                {
+                 printf("Something wrong with donor_sites(1000er) (intron_length: %d)\n", dist) ;
+                 sleep(5000) ;
+               }
+             }
+           }
+         }
+
+         
+
+          //all introns of length <= max_len
+          for (int k=start_splice_pos; k < nr_donor_sites; k++) 
+         //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
+         {   
+           splice_pos = donor_sites[k] ;
+           if (splice_pos > j-4) { break ;}
+           prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+           
+           if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+           {
+             int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+             assert(dist<=max_len) ;
+             tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+             if (isfinite(tempValue))
+             {
+               assert(num_elem<max_num_elem) ;
+               array[num_elem].prev_i = i; 
+               array[num_elem].prev_j = splice_pos-1; /* predecessor */
+               array[num_elem].prev_matrix_no = z;
+               array[num_elem].isSplice = true; /* splice site */
+               scores[num_elem]=-tempValue ;
+               num_elem++ ;
+             }
+             else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+             {
+               printf("Something wrong with donor_sites (intron_length: %d)\n", dist) ;
+               sleep(5000) ;
+             }
+           }
+         }
+       }
+
+      } //end of z
+      
+      CMath::nmin<struct ArrayElem>(scores, array, num_elem, nr_paths);
+      
+      /* the nr_paths many highest scores are written in the nr_paths many matrices*/
+      for (int z=0; z<nr_paths; z++) {
+       if (z>=num_elem) {
+         //nothing happens, entry in matrices[z] at pos. (i,j) still -inf
+       }
+       else {
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->value = -scores[z] ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_i = array[z].prev_i ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_j = array[z].prev_j ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->issplice = array[z].isSplice ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_matrix_no = array[z].prev_matrix_no;
+
+         if (-scores[z]>=max_scores[z]) { //storing the postion of the highest alignment score yet
+           max_scores[z] = -scores[z] ;
+           max_score_positions[2*z]    = i; // pos i
+           max_score_positions[2*z +1] = j; // pos j
+         }
+       }
+      } //end of z
+
+    } //end of j
+  } // end of i
+
+
+
+  //free memory
+  delete[] array;
+  delete[] scores;
+  
+  return;
+}
+
+
diff --git a/QPalmaDP/fill_matrix.h b/QPalmaDP/fill_matrix.h
new file mode 100644 (file)
index 0000000..b0f169f
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef _FILLMATRIX_H__
+#define _FILLMATRIX_H__
+
+#include "Mathmatics.h"
+#include "penalty_info.h"
+#include <algorithm> //Eigentlich in Header
+#include <assert.h>
+#include <ctype.h>
+#include <iostream>
+#include <math.h>
+#include <stdlib.h> 
+#include <string.h>
+
+
+struct ArrayElem { //24B
+  double score;
+  int prev_j;
+  int prev_i;
+  int prev_matrix_no;   
+  bool isSplice;
+} ;
+
+struct pre_score { //24B
+  double value; //8
+  int prev_i; //4
+  int prev_j; //4
+  int prev_matrix_no;//4
+  bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+  double value ; 
+  bool issplice;
+  int previ,prevj ;
+  char est_char;
+  char dna_char;
+};
+
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+// funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+
+extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+#endif // _FILLMATRIX_H__
diff --git a/QPalmaDP/io.cpp b/QPalmaDP/io.cpp
new file mode 100644 (file)
index 0000000..5ae3e1c
--- /dev/null
@@ -0,0 +1,96 @@
+#include "io.h"
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+FILE* CIO::target=stdout;
+
+CIO::CIO()
+{
+}
+
+void CIO::message(EMessageType prio, const CHAR *fmt, ... )
+{
+       check_target();
+       print_message_prio(prio, target);
+    va_list list;
+    va_start(list,fmt);
+    vfprintf(target,fmt,list);
+    va_end(list);
+    fflush(target);
+}
+
+void CIO::buffered_message(EMessageType prio, const CHAR *fmt, ... )
+{
+       check_target();
+       print_message_prio(prio, target);
+    va_list list;
+    va_start(list,fmt);
+    vfprintf(target,fmt,list);
+    va_end(list);
+}
+
+CHAR* CIO::skip_spaces(CHAR* str)
+{
+       INT i=0;
+
+       if (str)
+       {
+               for (i=0; isspace(str[i]); i++);
+
+               return &str[i];
+       }
+       else 
+               return str;
+}
+
+void CIO::set_target(FILE* t)
+{
+       target=t;
+}
+
+void CIO::check_target()
+{
+       if (!target)
+               target=stdout;
+}
+
+void CIO::print_message_prio(EMessageType prio, FILE* target)
+{
+       switch (prio)
+       {
+               case M_DEBUG:
+                       fprintf(target, "[DEBUG] ");
+                       break;
+               case M_PROGRESS:
+                       fprintf(target, "[PROGRESS] ");
+                       break;
+               case M_INFO:
+                       //fprintf(target, "[INFO] ");
+                       break;
+               case M_NOTICE:
+                       fprintf(target, "[NOTICE] ");
+                       break;
+               case M_WARN:
+                       fprintf(target, "[WARN] ");
+                       break;
+               case M_ERROR:
+                       fprintf(target, "[ERROR] ");
+                       break;
+               case M_CRITICAL:
+                       fprintf(target, "[CRITICAL] ");
+                       break;
+               case M_ALERT:
+                       fprintf(target, "[ALERT] ");
+                       break;
+               case M_EMERGENCY:
+                       fprintf(target, "[EMERGENCY] ");
+                       break;
+               case M_MESSAGEONLY:
+                       break;
+               default:
+                       break;
+       }
+}
diff --git a/QPalmaDP/io.h b/QPalmaDP/io.h
new file mode 100644 (file)
index 0000000..c127f92
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef __CIO_H__
+#define __CIO_H__
+
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+
+class CIO
+{
+public:
+       CIO();
+
+       static void set_target(FILE* target);
+       static void message(EMessageType prio, const CHAR *fmt, ... );
+
+       inline static void not_implemented() 
+       {
+               message(M_ERROR, "Sorry, not yet implemented\n");
+       }
+
+       static void buffered_message(EMessageType prio, const CHAR *fmt, ... );
+
+       static CHAR* skip_spaces(CHAR* str);
+
+protected:
+       static void check_target();
+       static void print_message_prio(EMessageType prio, FILE* target);
+
+protected:
+       static FILE* target;
+};
+#endif
diff --git a/QPalmaDP/penalty_info.cpp b/QPalmaDP/penalty_info.cpp
new file mode 100644 (file)
index 0000000..8095ca1
--- /dev/null
@@ -0,0 +1,354 @@
+#include <assert.h>
+#include "config.h"
+//#include "features/CharFeatures.h"
+//#include "features/StringFeatures.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "io.h"
+
+
+#include "fill_matrix.h"
+#include "penalty_info.h"
+
+void init_penalty_struct(struct penalty_struct &PEN)
+{
+       PEN.limits=NULL ;
+       PEN.penalties=NULL ;
+       PEN.id=-1 ;
+       PEN.next_pen=NULL ;
+       PEN.transform = T_LINEAR ;
+       PEN.name = NULL ;
+       PEN.max_len=0 ;
+       PEN.min_len=0 ;
+       PEN.cache=NULL ;
+       PEN.use_svm=0 ;
+}
+
+void init_penalty_struct_cache(struct penalty_struct &PEN)
+{
+       if (PEN.cache || PEN.use_svm)
+               return ;
+               
+       REAL* cache=new REAL[PEN.max_len+1] ;
+       if (cache)
+       {
+               for (INT i=0; i<=PEN.max_len; i++)
+                       cache[i] = lookup_penalty(&PEN, i, 0, false) ;
+               PEN.cache = cache ;
+       }
+}
+
+void delete_penalty_struct(struct penalty_struct &PEN)
+{
+       if (PEN.id!=-1)
+       {
+               delete[] PEN.limits ;
+               delete[] PEN.penalties ;
+               delete[] PEN.name ;
+               delete[] PEN.cache ;
+       }
+}
+
+void delete_penalty_struct_array(struct penalty_struct *PEN, INT len)
+{
+       for (int i=0; i<len; i++)
+               delete_penalty_struct(PEN[i]) ;
+       delete[] PEN ;
+}
+
+
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, int &P)
+//{
+//     P = mxGetN(mx_penalty_info) ;
+//     
+//     struct penalty_struct * PEN = new struct penalty_struct[P] ;
+//     for (INT i=0; i<P; i++)
+//              init_penalty_struct(PEN[i]) ;
+//     
+//     for (INT i=0; i<P; i++)
+//     {
+//       const mxArray* mx_elem = mxGetCell(mx_penalty_info, i) ;
+//       if (mx_elem==NULL || !mxIsStruct(mx_elem))
+//         {
+//           CIO::message(M_ERROR, "empty cell element\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         } ;
+//       const mxArray* mx_id_field = mxGetField(mx_elem, 0, "id") ;
+//       if (mx_id_field==NULL || !mxIsNumeric(mx_id_field) || 
+//           mxGetN(mx_id_field)!=1 || mxGetM(mx_id_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing id field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_limits_field = mxGetField(mx_elem, 0, "limits") ;
+//       if (mx_limits_field==NULL || !mxIsNumeric(mx_limits_field) ||
+//           mxGetM(mx_limits_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing limits field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT len = mxGetN(mx_limits_field) ;
+//       
+//       const mxArray* mx_penalties_field = mxGetField(mx_elem, 0, "penalties") ;
+//       if (mx_penalties_field==NULL || !mxIsNumeric(mx_penalties_field) ||
+//           mxGetM(mx_penalties_field)!=1 || mxGetN(mx_penalties_field)!=len)
+//         {
+//           CIO::message(M_ERROR, "missing penalties field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_transform_field = mxGetField(mx_elem, 0, "transform") ;
+//       if (mx_transform_field==NULL || !mxIsChar(mx_transform_field))
+//         {
+//           CIO::message(M_ERROR, "missing transform field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_name_field = mxGetField(mx_elem, 0, "name") ;
+//       if (mx_name_field==NULL || !mxIsChar(mx_name_field))
+//         {
+//           CIO::message(M_ERROR, "missing name field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_max_len_field = mxGetField(mx_elem, 0, "max_len") ;
+//       if (mx_max_len_field==NULL || !mxIsNumeric(mx_max_len_field) ||
+//           mxGetM(mx_max_len_field)!=1 || mxGetN(mx_max_len_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing max_len field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_min_len_field = mxGetField(mx_elem, 0, "min_len") ;
+//       if (mx_min_len_field==NULL || !mxIsNumeric(mx_min_len_field) ||
+//           mxGetM(mx_min_len_field)!=1 || mxGetN(mx_min_len_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing min_len field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_use_svm_field = mxGetField(mx_elem, 0, "use_svm") ;
+//       if (mx_use_svm_field==NULL || !mxIsNumeric(mx_use_svm_field) ||
+//           mxGetM(mx_use_svm_field)!=1 || mxGetN(mx_use_svm_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing use_svm field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT use_svm = (INT) mxGetScalar(mx_use_svm_field) ;
+//             //fprintf(stderr, "use_svm_field=%i\n", use_svm) ;
+//       
+//       const mxArray* mx_next_id_field = mxGetField(mx_elem, 0, "next_id") ;
+//       if (mx_next_id_field==NULL || !mxIsNumeric(mx_next_id_field) ||
+//           mxGetM(mx_next_id_field)!=1 || mxGetN(mx_next_id_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing next_id field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT next_id = (INT) mxGetScalar(mx_next_id_field)-1 ;
+//       
+//       INT id = (INT) mxGetScalar(mx_id_field)-1 ;
+//       if (i<0 || i>P-1)
+//         {
+//           CIO::message(M_ERROR, "id out of range\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT max_len = (INT) mxGetScalar(mx_max_len_field) ;
+//       if (max_len<0 || max_len>1024*1024*100)
+//         {
+//           CIO::message(M_ERROR, "max_len out of range\n") ;
+//                     delete_penalty_struct_array(PEN,P) ;
+//                     return NULL ;
+//         }
+//       PEN[id].max_len = max_len ;
+//
+//       INT min_len = (INT) mxGetScalar(mx_min_len_field) ;
+//       if (min_len<0 || min_len>1024*1024*100)
+//         {
+//           CIO::message(M_ERROR, "min_len out of range\n") ;
+//                     delete_penalty_struct_array(PEN,P) ;
+//                     return NULL ;
+//         }
+//       PEN[id].min_len = min_len ;
+//       
+//       if (PEN[id].id!=-1)
+//         {
+//           CIO::message(M_ERROR, "penalty id already used\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       PEN[id].id=id ;
+//       if (next_id>=0)
+//         PEN[id].next_pen=&PEN[next_id] ;
+//       //fprintf(stderr,"id=%i, next_id=%i\n", id, next_id) ;
+//       
+//       assert(next_id!=id) ;
+//       PEN[id].use_svm=use_svm ;
+//       PEN[id].limits = new REAL[len] ;
+//       PEN[id].penalties = new REAL[len] ;
+//       double * limits = mxGetPr(mx_limits_field) ;
+//       double * penalties = mxGetPr(mx_penalties_field) ;
+//       
+//       for (INT i=0; i<len; i++)
+//         {
+//           PEN[id].limits[i]=limits[i] ;
+//           PEN[id].penalties[i]=penalties[i] ;
+//         }
+//       PEN[id].len = len ;
+//       
+//       char *transform_str = mxArrayToString(mx_transform_field) ;                           
+//       char *name_str = mxArrayToString(mx_name_field) ;                             
+//       
+//       if (strcmp(transform_str, "log")==0)
+//         PEN[id].transform = T_LOG ;
+//       else if (strcmp(transform_str, "log(+1)")==0)
+//         PEN[id].transform = T_LOG_PLUS1 ;   
+//       else if (strcmp(transform_str, "log(+3)")==0)
+//         PEN[id].transform = T_LOG_PLUS3 ;   
+//       else if (strcmp(transform_str, "(+3)")==0)
+//         PEN[id].transform = T_LINEAR_PLUS3 ;        
+//       else if (strcmp(transform_str, "")==0)
+//         PEN[id].transform = T_LINEAR ;      
+//       else
+//         {
+//           delete_penalty_struct_array(PEN,P) ;
+//           mxFree(transform_str) ;
+//           return NULL ;
+//         }
+//       PEN[id].name = new char[strlen(name_str)+1] ;
+//       strcpy(PEN[id].name, name_str) ;
+//       
+//       init_penalty_struct_cache(PEN[id]) ;
+//         
+//       mxFree(transform_str) ;
+//       mxFree(name_str) ;
+//     }
+//     return PEN ;
+//}
+
+REAL lookup_penalty_svm(const struct penalty_struct *PEN, INT p_value, REAL *d_values)
+{      
+       if (PEN==NULL)
+               return 0 ;
+       assert(PEN->use_svm>0) ;
+       REAL d_value=d_values[PEN->use_svm-1] ;
+       //fprintf(stderr,"transform=%i, d_value=%1.2f\n", (INT)PEN->transform, d_value) ;
+       
+       switch (PEN->transform)
+       {
+       case T_LINEAR:
+               break ;
+       case T_LOG:
+               d_value = log(d_value) ;
+               break ;
+       case T_LOG_PLUS1:
+               d_value = log(d_value+1) ;
+               break ;
+       case T_LOG_PLUS3:
+               d_value = log(d_value+3) ;
+               break ;
+       case T_LINEAR_PLUS3:
+               d_value = d_value+3 ;
+               break ;
+       default:
+               CIO::message(M_ERROR, "unknown transform\n") ;
+               break ;
+       }
+       
+       INT idx = 0 ;
+       REAL ret ;
+       for (INT i=0; i<PEN->len; i++)
+               if (PEN->limits[i]<=d_value)
+                       idx++ ;
+       
+       if (idx==0)
+               ret=PEN->penalties[0] ;
+       else if (idx==PEN->len)
+               ret=PEN->penalties[PEN->len-1] ;
+       else
+       {
+               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
+       }
+       
+       //fprintf(stderr,"ret=%1.2f\n", ret) ;
+
+       if (PEN->next_pen)
+               ret+=lookup_penalty(PEN->next_pen, p_value, d_values);
+       
+       //fprintf(stderr,"ret=%1.2f\n", ret) ;
+       
+       return ret ;
+}
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
+                                       REAL* svm_values, bool follow_next)
+{      
+       if (PEN==NULL)
+               return 0 ;
+       if (PEN->use_svm)
+               return lookup_penalty_svm(PEN, p_value, svm_values) ;
+               
+       if ((p_value<PEN->min_len) || (p_value>PEN->max_len))
+               return -CMath::INFTY ;
+       
+       if (PEN->cache!=NULL && (p_value>=0) && (p_value<=PEN->max_len))
+       {
+               REAL ret=PEN->cache[p_value] ;
+               if (PEN->next_pen && follow_next)
+                       ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+               return ret ;
+       }
+       
+       REAL d_value = (REAL) p_value ;
+       switch (PEN->transform)
+       {
+       case T_LINEAR:
+               break ;
+       case T_LOG:
+               d_value = log(d_value) ;
+               break ;
+       case T_LOG_PLUS1:
+               d_value = log(d_value+1) ;
+               break ;
+       case T_LOG_PLUS3:
+               d_value = log(d_value+3) ;
+               break ;
+       case T_LINEAR_PLUS3:
+               d_value = d_value+3 ;
+               break ;
+       default:
+               CIO::message(M_ERROR, "unknown transform\n") ;
+               break ;
+       }
+
+       INT idx = 0 ;
+       REAL ret ;
+       for (INT i=0; i<PEN->len; i++)
+               if (PEN->limits[i]<=d_value)
+                       idx++ ;
+       
+       if (idx==0)
+               ret=PEN->penalties[0] ;
+       else if (idx==PEN->len)
+               ret=PEN->penalties[PEN->len-1] ;
+       else
+       {
+               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
+       }
+       //if (p_value>=30 && p_value<150)
+       //fprintf(stderr, "%s %i(%i) -> %1.2f\n", PEN->name, p_value, idx, ret) ;
+       
+       if (PEN->next_pen && follow_next)
+               ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+
+       return ret ;
+}
diff --git a/QPalmaDP/penalty_info.h b/QPalmaDP/penalty_info.h
new file mode 100644 (file)
index 0000000..2ed9ee1
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __PENALTY_INFO_H__
+#define __PENALTY_INFO_H__
+
+#include "common.h"
+#include "Mathmatics.h"
+
+enum ETransformType
+{
+       T_LINEAR,
+       T_LOG,
+       T_LOG_PLUS1,
+       T_LOG_PLUS3,
+       T_LINEAR_PLUS3
+}  ;
+
+struct penalty_struct
+{
+       INT len ;
+       REAL *limits ;
+       REAL *penalties ;
+       INT max_len ;
+       INT min_len ;
+       REAL *cache ;
+       enum ETransformType transform ;
+       INT id ;
+       struct penalty_struct *next_pen ;
+       char * name ;
+       INT use_svm ;
+} ;
+
+void init_penalty_struct(struct penalty_struct &PEN) ;
+void delete_penalty_struct(struct penalty_struct &PEN) ;
+void delete_penalty_struct_array(struct penalty_struct *PEN, INT len) ;
+
+//#ifdef HAVE_MATLAB
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, mwSize &P) ;
+//#endif
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
+                                       REAL* svm_values, bool follow_next=true) ;
+
+#endif
diff --git a/QPalmaDP/print_align.cpp b/QPalmaDP/print_align.cpp
new file mode 100644 (file)
index 0000000..312cabc
--- /dev/null
@@ -0,0 +1,97 @@
+#include "fill_matrix.h"
+using namespace std;
+
+void print_align(Pre_score* matrix, int length_est,  int length_dna,Align_pair* vektor, int result_length, int print_matrix)
+  
+{
+  //Druck der matrix
+  switch(print_matrix)
+    {
+    case 1:
+      {
+       cout <<"Matrix: \n";
+       
+       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+         {
+           cout << "Zeile " << k << ":\t"; 
+           for (int l= 0; l<length_dna; l++)
+             {
+               cout << ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+             } 
+           cout <<"\n";
+         }
+       break;  
+      }
+      
+    case 2:
+      {
+       cout <<"Matrix: \n";
+       
+       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+         {
+           cout << "Zeile " << k << ":\t"; 
+           for (int l= 0; l<length_dna; l++)
+             {
+               if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k-1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+                 cout << "\\" <<((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+                 cout << "<"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k -1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l))
+                 cout << "|"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else             { cout << ((Pre_score*)matrix + (k*length_dna) + l)->value << "("; 
+               cout << ((Pre_score*)matrix + (k*length_dna) + l)->prev_i << "," << ((Pre_score*)matrix + (k*length_dna) + l)->prev_j << ")\t";  
+               }
+               
+             } 
+           cout <<"\n";
+         }
+        
+       cout << "length_est" << length_est << "\n";
+        cout << "length_dna" << length_dna << "\n";
+        cout << "result_length: " << result_length << "\n";
+       
+       
+       break;  
+      } 
+      
+    case 3:
+      {
+       //Druck des Alignment
+       
+       // cout << "Alignment\n" ;
+       int j = 0;
+       // cout << "resultlength: " << result_length <<"\n";
+       while (j<result_length) {
+         int pacman = j;       
+         cout << "D:(" <<j<<")\t";
+         for (int i=0; ((j<result_length) && (i<50)); i++)
+           {
+             cout << vektor[j].dna_char << " ";        
+             j++;      
+           }
+         
+         cout << "\n"; 
+         
+         cout << "E:(" <<pacman<<")\t";
+         for (int i=0;((pacman<result_length) && (i<50)); i++)
+           {
+             cout << vektor[pacman].est_char << " "; 
+             pacman++;
+           }
+         cout << "\n\n";       
+       }               
+       
+       //Ende Druck Alignment
+       break ;
+      }
+      
+    default:
+      {
+       cout <<"";
+      }
+    }
+  
+  
+  
+}
+
diff --git a/QPalmaDP/qpalma_dp.cpp b/QPalmaDP/qpalma_dp.cpp
new file mode 100644 (file)
index 0000000..1e86e49
--- /dev/null
@@ -0,0 +1,321 @@
+#include "qpalma_dp.h"
+
+/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
+  myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
+  print_matrix) */
+
+/* nlhs - Number of expected mxArrays (Left Hand Side)
+   plhs - Array of pointers to expected outputs
+   nrhs - Number of inputs (Right Hand Side)
+   prhs - Array of pointers to input data. */
+
+  myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
+  print_matrix) */
+
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+  
+  
+
+  /***************************************************************************/
+  // right number of arguments? 
+  /***************************************************************************/
+  if(nrhs != 9)
+  {
+    mexPrintf("10 input arguments please: \n [splice_align, est_align, weightMatch, alignmentscores, dnaest] = myalign(nr_paths, dna, est, cellarray, matchmatrix, donor, acceptor, remove_duplicate_scores, print_matrix)\n");
+    return ;
+  }
+  
+  if(nlhs != 5)
+  {
+    mexPrintf("5 output arguments please\n [splice_align, est_align, weightMatch, alignmentscores, dnaest] = myalign(nr_paths, dna, est, cellarray, matchmatrix, donor, acceptor, remove_duplicate_scores, print_matrix)\n");
+    return ;
+  }
+
+
+  /***************************************************************************/
+  // variables 
+  /***************************************************************************/
+  const int mlen = 6; // score matrix: length of 6 for "- A C G T N"
+  int arg = 0 ; // counter variable
+  int failure ; // 
+
+
+  /***************************************************************************/
+  // 0. nr_paths                              
+  /***************************************************************************/
+  int nr_paths = (int)*mxGetPr(prhs[arg]); // 0. input (double)
+  arg++; // arg -> 1
+
+  
+  /***************************************************************************/
+  // 1. dna
+  /***************************************************************************/
+  int dna_len = mxGetN(prhs[arg]) + 1; // mxGetN: number of columns (int)
+  char* dna = (char*)mxCalloc(dna_len, sizeof(char)); 
+  
+  failure = mxGetString(prhs[arg], dna, dna_len); // NULL at end of string
+  if (failure) { // mxGetString returns 0 if success, 1 on failure
+    mexPrintf("couldn't open dna");
+    return;
+  }
+  arg++; // arg -> 2
+  
+
+  /***************************************************************************/
+  // 2. est
+  /***************************************************************************/
+  int est_len = mxGetN(prhs[arg]) + 1;
+  char* est = (char*)mxCalloc(est_len, sizeof(char));
+  
+  failure = mxGetString(prhs[arg], est, est_len);
+  if (failure) {
+    mexPrintf("couldn't open est");
+    return;
+  }
+  arg++; // arg -> 3
+  
+
+  /***************************************************************************/
+  // 3. h (cell array)
+  /***************************************************************************/
+  penalty_struct* functions;
+  mwSize anz_func = 1;
+  
+  functions = read_penalty_struct_from_cell(prhs[arg], anz_func);
+  //std::cout << " bla " << functions->max_len << " \n";
+  //std::cout << "lookup_penalty(5) = " << lookup_penalty(functions, 5, NULL, 0)<<std::endl ;
+  arg++; // arg -> 4
+  
+
+  /***************************************************************************/
+  // 4. match matrix (columnwise)
+  /***************************************************************************/
+  int nr_entries = mxGetN(prhs[arg]) * mxGetM(prhs[arg]);
+  //mexPrintf("\nNumber of entries in matchmatrix: %d\n", nr_entries) ;
+  double *matchmatrix;
+  matchmatrix = mxGetPr(prhs[arg]);
+  //mexPrintf("matchmatrix\n") ;
+  //for (int i=0; i< nr_entries; i++) {
+  //  mexPrintf("entry %d: <%f>\n", i, matchmatrix[i]);
+  //}
+  arg++; // arg -> 5
+  
+
+  /***************************************************************************/
+  // 5. donor
+  /***************************************************************************/
+  int donor_length = mxGetN(prhs[arg]);
+  double *donor;
+  donor = mxGetPr(prhs[arg]);
+  //mexPrintf("Donor eingelesen: <%f>\n", donor[0]);
+  arg++; // arg -> 6
+  
+
+  /***************************************************************************/
+  // 6. acceptor
+  /***************************************************************************/
+  int acceptor_length = mxGetN(prhs[arg]);
+  double *acceptor;
+  acceptor = mxGetPr(prhs[arg]);
+  //mexPrintf("Acceptor eingelesen: <%f>\n", acceptor[0]);
+  arg++; // arg -> 7
+
+  
+  /***************************************************************************/
+  // 7.remove_duplicate_scores
+  /***************************************************************************/
+  bool remove_duplicate_scores = (bool)*mxGetPr(prhs[arg]);
+  arg++; // arg -> 8
+  
+
+  /***************************************************************************/
+  // 8. print_matrix
+  /***************************************************************************/
+  int print_matrix = (int)*mxGetPr(prhs[arg]);
+  arg++; // arg -> 9
+
+
+  /***************************************************************************/ 
+  // initialize alignment matrices and call fill_matrix()  
+  /***************************************************************************/
+  
+  //possible donor positions
+  int nr_donor_sites = 0 ;
+  for (int ii=0; ii<donor_length; ii++) {
+    if(isfinite(donor[ii])) {
+      nr_donor_sites++ ;
+    }
+  }
+  //mwSize donor_sites[nr_donor_sites] ;
+  mwSize* donor_sites = (mwSize*)mxCalloc(nr_donor_sites,sizeof(mwSize));
+  int donor_idx = 0 ;
+  for (int ii=0; ii<donor_length; ii++) {
+    if(isfinite(donor[ii])) {
+      donor_sites[donor_idx] = ii+1 ;
+      donor_idx++ ;
+    }  
+  }
+
+
+  //int max_score_positions[nr_paths*2]; //local alignment: where is the highest alignment score [i1,j1,i2,j2...]
+  mwSize* max_score_positions = (mwSize*)mxCalloc(nr_paths*2,sizeof(mwSize));
+
+  //Pre_score* matrices[nr_paths];
+  Pre_score** matrices = (Pre_score**)mxCalloc(nr_paths,sizeof(Pre_score*));
+  for (int z=0; z<nr_paths; z++) {
+      //matrices[z] = new Pre_score[dna_len * est_len];
+      //mexPrintf("size of one matrice: %d\n", est_len*dna_len*sizeof(Pre_score)) ;
+      matrices[z] = (Pre_score*)mxCalloc(dna_len*est_len,sizeof(Pre_score));
+  }
+  
+  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, functions, matchmatrix, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+  
+  //just some info
+  //for (int z=0; z<nr_paths; z++) {
+  //  mexPrintf("(%d, %d)\n", max_score_positions[2*z], max_score_positions[2*z +1]) ;
+  //}
+  /***************************************************************************/ 
+  // return arguments etc.
+  /***************************************************************************/
+  int result_length; //Eine Variable fuer alle Matrizen
+
+  //int splice_align[(dna_len-1)*nr_paths]; //SpliceAlign for DNA - 1 line per matrix
+  //int est_align[(est_len-1)*nr_paths]; //SpliceAlign for EST - 1 line per matrix
+  //int mmatrix_param[(mlen*mlen)*nr_paths];//Matrix fuer Scorematrixparameter - 1 Zeile pro Matrix
+  int* splice_align = (int*)mxCalloc((dna_len-1)*nr_paths,sizeof(int));
+  int* est_align = (int*)mxCalloc((est_len-1)*nr_paths,sizeof(int));
+  int* mmatrix_param = (int*)mxCalloc((mlen*mlen)*nr_paths,sizeof(int));
+  double alignmentscores[nr_paths]; //alignment score for each path/matrix
+  
+  
+  memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
+  memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
+  memset((char*)mmatrix_param, 0, mlen*mlen*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
+  memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
+  
+  // dnaest
+  plhs[nlhs-1] = mxCreateCellMatrix(2, nr_paths);
+
+
+  //mexPrintf("DEBUG: decoding %d paths\n",nr_paths);
+  for (int z=0; z<nr_paths; z++) {      
+    result_length = 0 ;
+    
+    int* s_align = splice_align + (dna_len-1)*z;  //pointer
+    int* e_align = est_align + (est_len-1)*z ;    //pointer
+    int* mparam  = mmatrix_param + (mlen*mlen)*z; //pointer
+    
+    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, s_align, e_align, mparam, alignmentscores, max_score_positions);
+      
+    // dnaest
+    mxArray* mxdna = mxCreateDoubleMatrix(result_length, 1, mxREAL);
+    mxArray* mxest = mxCreateDoubleMatrix(result_length, 1, mxREAL);
+    double *DNA = mxGetPr(mxdna) ;
+    double *EST = mxGetPr(mxest) ;
+    mxSetCell(plhs[nlhs-1], z*2, mxdna) ;
+    mxSetCell(plhs[nlhs-1], z*2+1, mxest) ;
+    
+    //backtracking
+    mwSize i = max_score_positions[2*z] ; //i (est)
+    mwSize j = max_score_positions[2*z +1] ; //j (dna)
+    mwSize prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
+    mwSize prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+    mwSize prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+    
+    for (int ii=result_length; ii>0; ii--) {
+      if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+       DNA[ii-1] = check_char(dna[j-1]) ;
+       EST[ii-1] = check_char(est[i-1]) ;
+      }
+      else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
+       DNA[ii-1] = check_char(dna[j-1]) ;
+       EST[ii-1] = 0 ; //gap
+      }
+      else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
+       DNA[ii-1] = 0 ; //gap
+       EST[ii-1] = check_char(est[i-1]) ;
+      }
+      else {// splice site
+       for (j; j > prev_j; j--) {
+         DNA[ii-1] = check_char(dna[j-1]) ;
+         EST[ii-1] = 6 ; //intron
+         ii-- ;
+       }
+       ii++ ; // last ii-- too much (because done in for-loop) 
+      }
+      
+      i = prev_i;
+      j = prev_j;    
+      prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
+      prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
+      prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
+    }
+    
+  } //end of z
+  
+  
+
+  //mexPrintf("DEBUG:Returning arguments\n");
+  int index = 0 ;
+
+  //splice_align (mxArray):
+  const mwSize dims0[] = {(dna_len-1), nr_paths};
+  //const int dims0[] = {(dna_len-1), nr_paths};
+  plhs[index] = mxCreateNumericArray(2, dims0, mxINT32_CLASS, mxREAL);
+  mwSize* start0 = (mwSize*)(mxGetData(plhs[index]));
+  memcpy(start0, splice_align, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+  index++ ; // ->1
+  //est_align (mxArray):
+  const mwSize dims1[] = {(est_len-1), nr_paths};
+  //const int dims1[] = {(est_len-1), nr_paths};
+  plhs[index] = mxCreateNumericArray(2, dims1, mxINT32_CLASS, mxREAL);
+  mwSize* start1 = (mwSize*)(mxGetData(plhs[index]));
+  memcpy(start1, est_align, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+  index++ ; // ->2 
+
+  //weightmatch (mxArray)
+  const mwSize dims2[] = {mlen*mlen, nr_paths};
+  //const int dims2[] = {mlen*mlen, nr_paths};
+  plhs[index] = mxCreateNumericArray(2, dims2, mxINT32_CLASS, mxREAL);
+  mwSize* start2= (mwSize*)(mxGetData(plhs[index]));
+  memcpy(start2, mmatrix_param, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+  index++ ; // ->3
+  
+  //alignmentscore 
+  plhs[index] = mxCreateDoubleMatrix(1,nr_paths,mxREAL);
+  if (plhs[index] == NULL)
+  {
+    printf("%s : Out of memory on line %d\n", __FILE__, __LINE__); 
+    printf("Unable to create mxArray.\n");
+    exit(1);
+  }
+  double* start3 = (double*)(mxGetData(plhs[index]));
+  memcpy(start3, alignmentscores, mxGetM(plhs[index])*mxGetN(plhs[index])*mxGetElementSize(plhs[index]));
+  index++ ; // ->4
+
+  assert(nlhs==index+1) ;
+
+  //Free Memory
+  //mexPrintf("DEBUG:Free Memory\n");
+  mxFree(donor_sites);
+  mxFree(max_score_positions);
+  for (int i=nr_paths-1;i>=0; i--)
+  {
+      //delete[] matrices[i];
+      mxFree(matrices[i]);
+  }
+  mxFree(matrices);
+  mxFree(splice_align);
+  mxFree(est_align);
+  mxFree(mmatrix_param);
+  mxFree(dna);
+  mxFree(est);
+
+  return;    
+  
+}
+
diff --git a/QPalmaDP/qpalma_dp.h b/QPalmaDP/qpalma_dp.h
new file mode 100644 (file)
index 0000000..c019466
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef _QPALMA_DP_H_
+#define _QPALMA_DP_H_
+
+#include "penalty_info.h"
+
+struct ArrayElem { //24B
+  double score;
+  int prev_j;
+  int prev_i;
+  int prev_matrix_no;   
+  bool isSplice;
+} ;
+
+struct pre_score { //24B
+  double value; //8
+  int prev_i; //4
+  int prev_j; //4
+  int prev_matrix_no;//4
+  bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+  double value ; 
+  bool issplice;
+  int previ,prevj ;
+  char est_char;
+  char dna_char;
+};
+
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, penalty_struct* functions, double* matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+
+extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+
+/* 
+ * The function myalign calculates a new alignment given the scoring scheme
+ * Its input arguments are:
+ *
+ * num_path(id)               ->    an integer specifying which alignment has to be done
+ * dna                        ->    
+ * est                        ->
+ * h                          ->
+ * mmatrix                    -> 
+ * donor                      ->
+ * acceptor                   ->
+ * remove_duplicate_scores    -> a boolean
+ * print_matrix               -> a boolean
+ *
+ * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
+ */
+
+class Alignment {
+
+   private:
+      SpliceAlign
+      EstAlign;
+      WeightMatch;
+      TotalScores;
+
+   public:
+      Alignment();
+      ~Alignment();
+
+      void myalign(int nr_paths, dna, est, h, matchmatrix, double* donor, double* acceptor, bool remove_duplicate_scores, bool print_matrix);
+      getSpliceAlign()
+      getEstAlign();
+      getWeightMatch();
+      getTotalScores();
+      getDNAEST();
+       
+
+};
+
+
+#endif  // _QPALMA_DP_H_
+
diff --git a/QPalmaDP/result_align.cpp b/QPalmaDP/result_align.cpp
new file mode 100644 (file)
index 0000000..2a49f68
--- /dev/null
@@ -0,0 +1,156 @@
+#include "assert.h"
+#include "fill_matrix.h"
+
+using namespace std;
+
+bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions)
+//Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
+
+{  
+  const int mlen=6; // length of matchmatrix
+  int startend[4] ; // [i_start, j_start, i_end, j_end] ;
+  startend[3] = max_score_positions[2*z] ; //i (est)
+  startend[4] = max_score_positions[2*z +1] ; //j (dna)
+  //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
+  int dnanum ; //using check_char: letter -> number
+  int estnum ; //using check_char: letter -> number
+  
+  int dna_pos ;
+  int est_pos ;
+  int splice_state ;
+  int est_state ;
+  for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
+    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+  for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
+    e_align[est_pos-1] = 4 ;
+  }
+  //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
+
+  /* starting point for backtracking */
+  int i = startend[3] ; 
+  int j = startend[4] ;
+  int act_m_no  = z ;
+  alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
+  //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
+
+  int prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i; 
+  int prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
+  int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+
+  if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
+  {
+    cout << "Kein weiterer Pfad vorhanden\n";
+    return 1;
+  }
+
+  if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
+    cout << "Kein weiterer Pfad vorhanden\n";
+    return 1;
+  }
+
+  assert(dna_pos == j) ;
+  assert(est_pos == i) ;
+  splice_state = 0 ; //exon
+  est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
+
+  //compute length of alignment (between max(m,n) and m+n)     
+  //local_alignment: backtracking to first Zero: look at actual value
+  while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
+    
+    //just some info
+    //if ((z>0) && (!(z==prev_m_no))) {
+    //  printf("(%d,%d): next step in next matrix\n", i,j) ;
+    //}
+
+    if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      s_align[dna_pos-1] = splice_state; 
+      dna_pos-- ;
+      e_align[est_pos-1] = est_state ; //1 or 2, depended
+      est_pos-- ;
+      
+      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+      mparam[mlen*dnanum +estnum] ++ ;
+    }
+    else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      s_align[dna_pos-1] = splice_state; 
+      dna_pos-- ;
+
+      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+      estnum = 0 ; //gap
+      mparam[mlen*dnanum +estnum] ++ ;
+    }
+    else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      e_align[est_pos-1] = est_state ;  //1 or 2, depended
+      est_pos-- ;
+
+      dnanum = 0 ; //gap
+      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+      mparam[mlen*dnanum +estnum] ++ ;
+    }
+    else {// splice site
+      (*result_length_ptr) =  (*result_length_ptr) + (j - prev_j);
+      if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
+       printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
+       printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
+       printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
+       printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
+       printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
+       printf("j: %d -> prev_j:%d\n", j, prev_j) ;
+       printf("i: %d -> prev_i:%d\n", i, prev_i) ;
+      }
+      if (est_state==1) { //was exon labeled "1"
+       est_state = 2 ; //new exon is labeled "2"
+      }
+      else {
+       est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
+      }
+      for (j; j > prev_j; j--) {
+       if (splice_state == 0) { //coming from exon
+         splice_state = 2 ; //first intron_state: acceptor
+       } else {
+         splice_state = 3 ; //intron
+       }
+       if (j-1 == prev_j) { //last intron_state: donor
+         splice_state = 1 ;//donor
+       }
+       s_align[dna_pos-1] = splice_state; 
+       dna_pos-- ;
+      }
+      splice_state = 0 ; //exon again
+      
+    }
+   
+    startend[1] = i;
+    startend[2] = j;
+    i = prev_i;
+    j = prev_j;
+    act_m_no = prev_m_no ;
+    
+    prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i; 
+    prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j; 
+    prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+  }
+  for (dna_pos; dna_pos>0; dna_pos--) {
+    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+  for (est_pos; est_pos>0; est_pos--) {
+    e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+
+  //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
+  //printf("result_length: %d\n", *result_length_ptr) ;
+
+
+  return 0;
+}
+
+
index 7e345a8..4f1759f 100644 (file)
Binary files a/python/.qpalma.py.swp and b/python/.qpalma.py.swp differ
index e4c421c..51c6e9f 100644 (file)
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+from numpy import inf
 from numpy.matlib import zeros
 
 def calculateWeights(plf, scores):
@@ -22,42 +23,44 @@ def calculateWeights(plf, scores):
 
     y_i and y_i+1 parameters, so the fractions are saved in the weight vectors!
    """
-   currentWeight = zeros((1,30))
+
+   currentWeight = zeros((30,1))
    
    for k in range(len(scores)):
       value = scores[k]
-      Lower = sum(d.limits <= value)
-      Upper = Lower +1 ; # x-werte bleiben fest
+      
+      Lower = 0
+      Lower = len([elem for elem in plf.limits if elem <= value])
+      Upper = Lower+1 ; # x-werte bleiben fest
 
       if Lower == 0:
-         currentWeight[1] = currentWeight[1] + 1;
-      elif:
-         Lower == length[plf.limits]
-         currentWeight[30] = currentWeight[30] + 1;
+         currentWeight[0] = currentWeight[0] + 1;
+      elif Lower == len(plf.limits):
+         currentWeight[-1] = currentWeight[-1] + 1;
       else:
-         weightup  = (value - plf.limits(Lower)) / (plf.limits(Upper) - plf.limits(Lower)) ;
-         weightlow = (plf.limits(Upper) - value) / (plf.limits(Upper) - plf.limits(Lower)) ;
-         currentWeight(Upper) = currentWeight(Upper) + weightup;
-         currentWeight(Lower) = currentWeight(Lower) +  weightlow;
+         weightup  = (value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower]) ;
+         weightlow = (plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower]) ;
+         currentWeight[Upper] = currentWeight[Upper] + weightup;
+         currentWeight[Lower] = currentWeight[Lower] + weightlow;
 
    return currentWeight
 
 
 def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
-   assert(isempty(d.transform))
-   assert(isempty(a.transform))
-   assert(isempty(h.transform))
+   #assert(isempty(d.transform))
+   #assert(isempty(a.transform))
+   #assert(isempty(h.transform))
 
    ####################################################################################
    # 1. Donor: In don_supp stehen Werte der SVM., in SpliceAlign die Einsen
    ####################################################################################
 
-   # Lege Gewichtsvektor fuer 30 supporting points an:
-   weightDon = zeros((1,30))
-
    # Picke die Positionen raus, an denen eine Donorstelle ist
-   DonorScores = don_supp(find(SpliceAlign==1)) ;
-   assert(all(DonorScores~=-Inf)) ;
+   #DonorScores = don_supp(find(SpliceAlign==1)) ;
+   #assert(all(DonorScores~=-Inf)) ;
+
+   DonorScores = [elem for pos,elem in enumerate(don_supp) if SpliceAlign[pos] == 1]
+   assert not ( -inf in DonorScores )
 
    weightDon = calculateWeights(d,DonorScores)
 
@@ -65,25 +68,28 @@ def computeSpliceWeights(d, a, h, SpliceAlign, don_supp, acc_supp):
    # 2. Acceptor: In acc_supp stehen Werte der SVM., in SpliceAlign die Einsen
    ####################################################################################
 
-   AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ; 
-   assert(all(AcceptorScores~=-Inf)) ;
+   #AcceptorScores = acc_supp(find(SpliceAlign == 2)+1) ; 
+   #assert(all(AcceptorScores~=-Inf)) ;
 
    #Den Vektor Acceptorstellen durchgehen und die Gewichtsvektoren belasten:
-   weightAcc = calculateWeights(a,AcceptorScores)
+   AcceptorScores = [elem for pos,elem in enumerate(acc_supp) if SpliceAlign[pos] == 2]
+   #assert not ( -inf in AcceptorScores )
 
+   weightAcc = calculateWeights(a,AcceptorScores)
 
    ####################################################################################
    # 3. Intron length: SpliceAlign: Gaps zaehlen und auf Gapgewichte addieren
    ####################################################################################
 
-   intron_starts =  find(SpliceAlign==1) ;
-   intron_ends   =  find(SpliceAlign==2) ;
+   #intron_starts =  find(SpliceAlign==1) ;
+   #intron_ends   =  find(SpliceAlign==2) ;
 
-   %assert: introns do not overlap
-   assert(length(intron_starts) == length(intron_ends)) ;
-   assert(all(intron_starts < intron_ends)) ; 
-   assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ; 
+   #%assert: introns do not overlap
+   #assert(length(intron_starts) == length(intron_ends)) ;
+   #assert(all(intron_starts < intron_ends)) ; 
+   #assert(all(intron_ends(1:end-1) < intron_starts(2:end))) ; 
 
-   weightIntron = calculateWeights(h,AcceptorScores)
+   #weightIntron = calculateWeights(h,AcceptorScores)
+   weightIntron = [1,2,3]
 
    return weightDon, weightAcc, weightIntron
index 71ab47a..e48073e 100644 (file)
@@ -10,6 +10,8 @@ import numpy.matlib
 from set_param_palma import *
 from computeSpliceAlign import *
 
+from computeSpliceWeights import *
+
 """
 A training method for the QPalma project
 
@@ -213,15 +215,32 @@ def run():
          don_supp = Donors[exampleId] 
          acc_supp = Acceptors[exampleId] 
 
-
          # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
-         true_SpliceAlign, true_weightMatch = computeSpliceAlign(dna, exons)
+         trueSpliceAlign, trueWeightMatch = computeSpliceAlign(dna, exons)
 
+         trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
          pdb.set_trace()
 
-         true_weightDon, true_weightAcc, true_weightIntron = computeSpliceWeights(d, a, h, true_SpliceAlign, don_supp, acc_supp)
-      
-      
+         #
+         # compute wrong alignments
+         #  
+
+         # Compute donor, acceptor with penalty_lookup_new
+         # returns two double lists
+         [donor, acceptor] = compute_donacc(don_supp, acc_supp, d, a) ;
+       
+         #myalign wants the acceptor site on the g of the ag
+         acceptor = [acceptor(2:end) -Inf] ;
+         
+         # call myalign    
+         [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = ...
+           myalign_local(num_path(id), dna, est, {h}, mmatrix, donor, acceptor, ...
+                         remove_duplicate_scores, print_matrix);
+       
+          SpliceAlign = double(SpliceAlign') ; %column
+          weightMatch = double(weightMatch') ;
+
+
       iteration_nr += 1
       break
 
@@ -229,16 +248,7 @@ def run():
      for id = 1:N  
        %fprintf('%i\n', id) ;
        if (mod(id,50)==0), fprintf(1,'.'); end
-          
       
-       %some assertions concerning the splicesites
-       idx_don = find(don_supp~=-Inf) ;
-       assert(all(dna(idx_don)=='g')) ;
-       assert(all(dna(idx_don(idx_don<length(dna))+1)=='t' | dna(idx_don(idx_don<length(dna))+1)=='c')) ;
-       idx_acc = find(acc_supp~=-Inf) ;
-       assert(all(dna(idx_acc(idx_acc>3)-2)=='a')) ;
-       assert(all(dna(idx_acc(idx_acc>2)-1)=='g')) ;
-        
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % True Alignment and Comparison with wrong ones
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -365,9 +375,7 @@ def run():
        end ;
      end       
      fprintf('\n') ;
-
-
-
+      #################################################################################
       const_added = solver.addConstraint(deltas, loss, pos, marginRescaling)
 
       objValue,w,self.slacks = solver.solve()
@@ -377,14 +385,8 @@ def run():
      # maxgap=max(gap) 
      [mean(xis>=1) mean(gap>=1) mean(xis>=1e-3) mean(gap>=1e-3)]
 
-     [h,d,a,mmatrix] = set_param_palma(param,ARGS.train_with_intronlengthinformation) ;
-
-     save(paramfilename, ...
-          'param', 'h', 'd', 'a', 'mmatrix', 'A', 'b') ;
-
       if max(gap)<=column_eps:
          break
-     
 
    assert(all(maxgap<=column_eps))
    assert(length(take_idx)==N)
@@ -395,7 +397,6 @@ if __name__ == '__main__':
    run()
 
    """
-
       def __init__(self):
          numAcceptorParams = 0
          numDonorParams = 0