+ renamed files
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 25 Jan 2008 17:40:33 +0000 (17:40 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 25 Jan 2008 17:40:33 +0000 (17:40 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7588 e1793c9e-67f9-0310-80fc-b846ff1f7b36

36 files changed:
QPalmaDP/Makefile [deleted file]
QPalmaDP/Mathmatics.cpp [deleted file]
QPalmaDP/Mathmatics.h [deleted file]
QPalmaDP/QPalmaDP.i [deleted file]
QPalmaDP/common.h [deleted file]
QPalmaDP/config.h [deleted file]
QPalmaDP/debug_tools.h [deleted file]
QPalmaDP/fill_matrix.cpp [deleted file]
QPalmaDP/fill_matrix.h [deleted file]
QPalmaDP/io.cpp [deleted file]
QPalmaDP/io.h [deleted file]
QPalmaDP/penalty_info.cpp [deleted file]
QPalmaDP/penalty_info.h [deleted file]
QPalmaDP/print_align.cpp [deleted file]
QPalmaDP/qpalma_dp.cpp [deleted file]
QPalmaDP/qpalma_dp.h [deleted file]
QPalmaDP/result_align.cpp [deleted file]
dyn_prog/Makefile [new file with mode: 0644]
dyn_prog/Mathmatics.cpp [new file with mode: 0644]
dyn_prog/Mathmatics.h [new file with mode: 0644]
dyn_prog/QPalmaDP.i [new file with mode: 0644]
dyn_prog/common.h [new file with mode: 0644]
dyn_prog/config.h [new file with mode: 0644]
dyn_prog/debug_tools.h [new file with mode: 0644]
dyn_prog/fill_matrix.cpp [new file with mode: 0644]
dyn_prog/fill_matrix.h [new file with mode: 0644]
dyn_prog/io.cpp [new file with mode: 0644]
dyn_prog/io.h [new file with mode: 0644]
dyn_prog/penalty_info.cpp [new file with mode: 0644]
dyn_prog/penalty_info.h [new file with mode: 0644]
dyn_prog/print_align.cpp [new file with mode: 0644]
dyn_prog/qpalma_dp.cpp [new file with mode: 0644]
dyn_prog/qpalma_dp.h [new file with mode: 0644]
dyn_prog/result_align.cpp [new file with mode: 0644]
scripts/qpalma.py [deleted file]
scripts/qpalma_main.py [new file with mode: 0644]

diff --git a/QPalmaDP/Makefile b/QPalmaDP/Makefile
deleted file mode 100644 (file)
index 0ccad31..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-SRCS= Mathmatics.cpp\
-               fill_matrix.cpp\
-               qpalma_dp.cpp\
-               result_align.cpp\
-               penalty_info.cpp\
-               print_align.cpp\
-               io.cpp
-
-HDRS= Mathmatics.h\
-               common.h\
-               config.h\
-               debug_tools.h\
-               fill_matrix.h\
-               io.h\
-               penalty_info.h\
-               qpalma_dp.h
-
-OBJS = $(SRCS:%.cpp=%.o)
-
-#CXXFLAGS=-O3 -fPIC
-#CXXFLAGS=-O3 -fPIC -pg -fprofile-arcs
-CXXFLAGS=-O3 -fPIC -ggdb
-
-IF=QPalmaDP
-
-all: $(OBJS) $(HDRS)
-       swig -c++ -python ${IF}.i
-       g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
-       g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
-       python -c "import ${IF}"
-
-clean:
-       rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
diff --git a/QPalmaDP/Mathmatics.cpp b/QPalmaDP/Mathmatics.cpp
deleted file mode 100644 (file)
index 61298da..0000000
+++ /dev/null
@@ -1,349 +0,0 @@
-// Math.cpp: implementation of the CMath class.
-//
-//////////////////////////////////////////////////////////////////////
-
-
-#include "Mathmatics.h"
-#include "io.h"
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
-#include <assert.h>
-
-//////////////////////////////////////////////////////////////////////
-// Construction/Destruction
-//////////////////////////////////////////////////////////////////////
-
-#ifdef USE_LOGCACHE
-//gene/math specific constants
-#ifdef USE_HMMDEBUG
-#define MAX_LOG_TABLE_SIZE 10*1024*1024
-#define LOG_TABLE_PRECISION 1e-6
-#else
-#define MAX_LOG_TABLE_SIZE 123*1024*1024
-#define LOG_TABLE_PRECISION 1e-15
-#endif
-
-INT CMath::LOGACCURACY         = 0; // 100000 steps per integer
-#endif
-
-INT CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
-
-#ifdef USE_PATHDEBUG
-const REAL CMath::INFTY            =  1e11;            // infinity
-#else
-const REAL CMath::INFTY            =  -log(0.0);       // infinity
-#endif
-const REAL CMath::ALMOST_NEG_INFTY =  -1000;   
-
-CHAR CMath::rand_state[256];
-
-CMath::CMath()
-{
-       struct timeval tv;
-       gettimeofday(&tv, NULL);
-       UINT seed=(UINT) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
-       initstate(seed, CMath::rand_state, sizeof(CMath::rand_state));
-       CIO::message(M_INFO, "seeding random number generator with %u\n", seed);
-
-#ifdef USE_LOGCACHE
-    LOGRANGE=CMath::determine_logrange();
-    LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
-    CIO::message(M_INFO, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE,LOGACCURACY,sizeof(REAL),LOGRANGE*LOGACCURACY*sizeof(REAL)/(1024.0*1024.0)) ;
-   
-    CMath::logtable=new REAL[LOGRANGE*LOGACCURACY];
-    init_log_table();
-    CIO::message(M_INFO, "Done.\n") ;
-#else
-       INT i=0;
-       while ((REAL)log(1+((REAL)exp(-REAL(i)))))
-               i++;
-    CIO::message(M_INFO, "determined range for x in log(1+exp(-x)) is:%d\n", i);
-       LOGRANGE=i;
-#endif 
-}
-
-CMath::~CMath()
-{
-#ifdef USE_LOGCACHE
-       delete[] logtable;
-#endif
-}
-
-#ifdef USE_LOGCACHE
-INT CMath::determine_logrange()
-{
-    INT i;
-    REAL acc=0;
-    for (i=0; i<50; i++)
-    {
-       acc=((REAL)log(1+((REAL)exp(-REAL(i)))));
-       if (acc<=(REAL)LOG_TABLE_PRECISION)
-           break;
-    }
-
-    CIO::message(M_INFO, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
-    return i;
-}
-
-INT CMath::determine_logaccuracy(INT range)
-{
-    range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(REAL));
-    CIO::message(M_INFO, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
-    return range;
-}
-
-//init log table of form log(1+exp(x))
-void CMath::init_log_table()
-{
-  for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
-    logtable[i]=log(1+exp(REAL(-i)/REAL(LOGACCURACY)));
-}
-#endif
-
-void CMath::sort(INT *a, INT cols, INT sort_col)
-{
-  INT changed=1;
-  if (a[0]==-1) return ;
-  while (changed)
-  {
-      changed=0; INT i=0 ;
-      while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
-         {
-                 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
-                 {
-                         for (INT j=0; j<cols; j++)
-                                 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
-                         changed=1 ;
-                 } ;
-                 i++ ;
-         } ;
-  } ;
-} 
-
-void CMath::sort(REAL *a, INT* idx, INT N) 
-{
-
-       INT changed=1;
-       while (changed)
-       {
-               changed=0;
-               for (INT i=0; i<N-1; i++)
-               {
-                       if (a[i]>a[i+1])
-                       {
-                               swap(a[i],a[i+1]) ;
-                               swap(idx[i],idx[i+1]) ;
-                               changed=1 ;
-                       } ;
-               } ;
-       } ;
-        
-} 
-
-
-
-//plot x- axis false positives (fp) 1-Specificity
-//plot y- axis true positives (tp) Sensitivity
-INT CMath::calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile)
-{
-       INT left=0;
-       INT right=size-1;
-       INT i;
-
-       for (i=0; i<size; i++)
-       {
-               if (!(label[i]== -1 || label[i]==1))
-                       return -1;
-       }
-
-       //sort data such that -1 labels first +1 behind
-       while (left<right)
-       {
-               while ((label[left] < 0) && (left<right))
-                       left++;
-               while ((label[right] > 0) && (left<right))      //warning: label must be either -1 or +1
-                       right--;
-
-               swap(output[left],output[right]);
-               swap(label[left], label[right]);
-       }
-
-       // neg/pos sizes
-       negsize=left;
-       possize=size-left;
-       REAL* negout=output;
-       REAL* posout=&output[left];
-
-       // sort the pos and neg outputs separately
-       qsort(negout, negsize);
-       qsort(posout, possize);
-
-       // set minimum+maximum values for decision-treshhold
-       REAL minimum=min(negout[0], posout[0]);
-       REAL maximum=minimum;
-       if (negsize>0)
-               maximum=max(maximum,negout[negsize-1]);
-       if (possize>0)
-               maximum=max(maximum,posout[possize-1]);
-
-       REAL treshhold=minimum;
-       REAL old_treshhold=treshhold;
-
-       //clear array.
-       for (i=0; i<size; i++)
-       {
-               fp[i]=1.0;
-               tp[i]=1.0;
-       }
-
-       //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
-       //everything right of {pos,neg}idx is considered to beLONG to +1
-       INT posidx=0;
-       INT negidx=0;
-       INT iteration=1;
-       INT returnidx=-1;
-
-       REAL minerr=10;
-
-       while (iteration < size && treshhold<=maximum)
-       {
-               old_treshhold=treshhold;
-
-               while (treshhold==old_treshhold && treshhold<=maximum)
-               {
-                       if (posidx<possize && negidx<negsize)
-                       {
-                               if (posout[posidx]<negout[negidx])
-                               {
-                                       if (posout[posidx]==treshhold)
-                                               posidx++;
-                                       else
-                                               treshhold=posout[posidx];
-                               }
-                               else
-                               {
-                                       if (negout[negidx]==treshhold)
-                                               negidx++;
-                                       else
-                                               treshhold=negout[negidx];
-                               }
-                       }
-                       else
-                       {
-                               if (posidx>=possize && negidx<negsize-1)
-                               {
-                                       negidx++;
-                                       treshhold=negout[negidx];
-                               }
-                               else if (negidx>=negsize && posidx<possize-1)
-                               {
-                                       posidx++;
-                                       treshhold=posout[posidx];
-                               }
-                               else if (negidx<negsize && treshhold!=negout[negidx])
-                                       treshhold=negout[negidx];
-                               else if (posidx<possize && treshhold!=posout[posidx])
-                                       treshhold=posout[posidx];
-                               else
-                               {
-                                       treshhold=2*(maximum+100); // force termination
-                                       posidx=possize;
-                                       negidx=negsize;
-                                       break;
-                               }
-                       }
-               }
-
-               //calc tp,fp
-               tp[iteration]=(possize-posidx)/(REAL (possize));
-               fp[iteration]=(negsize-negidx)/(REAL (negsize));
-
-               //choose poINT with minimal error
-               if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
-               {
-                       minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
-                       tresh=(old_treshhold+treshhold)/2;
-                       returnidx=iteration;
-               }
-
-               iteration++;
-       }
-
-       // set new size
-       size=iteration;
-
-       if (rocfile)
-       {
-               const CHAR id[]="ROC";
-               fwrite(id, sizeof(char), sizeof(id), rocfile);
-               fwrite(fp, sizeof(REAL), size, rocfile);
-               fwrite(tp, sizeof(REAL), size, rocfile);
-       }
-
-       return returnidx;
-}
-
-UINT CMath::crc32(BYTE *data, INT len)
-{
-    UINT        result;
-    INT                 i,j;
-    BYTE       octet;
-
-    result = 0-1;
-
-    for (i=0; i<len; i++)
-    {
-       octet = *(data++);
-       for (j=0; j<8; j++)
-       {
-           if ((octet >> 7) ^ (result >> 31))
-           {
-               result = (result << 1) ^ 0x04c11db7;
-           }
-           else
-           {
-               result = (result << 1);
-           }
-           octet <<= 1;
-       }
-    }
-
-    return ~result; 
-}
-
-double CMath::mutual_info(REAL* p1, REAL* p2, INT len)
-{
-       double e=0;
-
-       for (INT i=0; i<len; i++)
-               for (INT j=0; j<len; j++)
-                       e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
-
-       return e;
-}
-
-double CMath::relative_entropy(REAL* p, REAL* q, INT len)
-{
-       double e=0;
-
-       for (INT i=0; i<len; i++)
-               e+=exp(p[i])*(p[i]-q[i]);
-
-       return e;
-}
-
-double CMath::entropy(REAL* p, INT len)
-{
-       double e=0;
-
-       for (INT i=0; i<len; i++)
-               e-=exp(p[i])*p[i];
-
-       return e;
-}
diff --git a/QPalmaDP/Mathmatics.h b/QPalmaDP/Mathmatics.h
deleted file mode 100644 (file)
index 2e24d48..0000000
+++ /dev/null
@@ -1,546 +0,0 @@
-#ifndef __MATHMATICS_H_
-#define __MATHMATICS_H_
-
-#include "common.h"
-#include "io.h"
-
-#include <math.h>
-#include <stdio.h>
-
-//define finite for win32
-#ifdef _WIN32
-#include <float.h>
-#define finite _finite
-#define isnan _isnan
-#endif
-
-#ifndef NAN
-#include <stdlib.h>
-#define NAN (strtod("NAN",NULL))
-#endif
-
-#ifdef SUNOS
-extern "C" int finite(double);
-#endif
-
-/** Mathematical Functions.
- * Class which collects generic mathematical functions
- */
-class CMath  
-{
-public:
-       /**@name Constructor/Destructor.
-        */
-       //@{
-       ///Constructor - initializes log-table
-       CMath();
-
-       ///Destructor - frees logtable
-       virtual ~CMath();
-       //@}
-
-       /**@name min/max/abs functions.
-       */
-       //@{
-
-       ///return the minimum of two integers
-       //
-       template <class T>
-       static inline T min(T a, T b)
-       {
-               return ((a)<=(b))?(a):(b);
-       }
-
-       ///return the maximum of two integers
-       template <class T>
-       static inline T max(T a, T b) 
-       {
-               return ((a)>=(b))?(a):(b);
-       }
-
-       ///return the maximum of two integers
-       template <class T>
-       static inline T abs(T a)
-       {
-               return ((a)>=(0))?(a):(-a);
-       }
-       //@}
-
-       /**@name misc functions */
-       //@{
-
-       /// crc32
-       static UINT crc32(BYTE *data, INT len);
-
-       static inline REAL round(REAL d)
-       {
-               return floor(d+0.5);
-       }
-
-       /// signum of type T variable a 
-       template <class T>
-       static inline REAL sign(REAL a)
-       {
-               if (a==0)
-                       return 0;
-               else return (a<0) ? (-1) : (+1);
-       }
-
-       /// swap floats a and b
-       template <class T>
-       static inline void swap(T & a,T &b)
-       {
-               T c=a ;
-               a=b; b=c ;
-       }
-
-       /// x^2
-       template <class T>
-       static inline T sq(T x)
-       {
-               return x*x;
-       }
-
-       static inline LONG factorial(INT n)
-       {
-               LONG res=1 ;
-               for (int i=2; i<=n; i++)
-                       res*=i ;
-               return res ;
-       }
-
-       static inline LONG nchoosek(INT n, INT k)
-       {
-               long res=1 ;
-
-               for (INT i=n-k+1; i<=n; i++)
-                       res*=i ;
-
-               return res/factorial(k) ;
-       }
-
-       /** performs a bubblesort on a given matrix a.
-        * it is sorted from in ascending order from top to bottom
-        * and left to right */
-       static void sort(INT *a, INT cols, INT sort_col=0) ;
-       static void sort(REAL *a, INT*idx, INT N) ;
-       
-       /** performs a quicksort on an array output of length size
-        * it is sorted from in ascending (for type T) */
-       template <class T>
-       static void qsort(T* output, INT size) ;
-
-       /** performs a quicksort on an array output of length size
-        * it is sorted from in ascending order 
-        * (for type T1) and returns the index (type T2)
-        * matlab alike [sorted,index]=sort(output) 
-        */
-       template <class T1,class T2>
-       static void qsort(T1* output, T2* index, INT size);
-
-       /** performs a quicksort on an array output of length size
-        * it is sorted from in ascending order
-        * (for type T1) and returns the index (type T2)
-        * matlab alike [sorted,index]=sort(output) 
-        */
-       template <class T1,class T2>
-       static void qsort_backward(T1* output, T2* index, INT size);
-
-     /* finds the smallest element in output and puts that element as the 
-               first element  */
-       template <class T>
-       static void min(REAL* output, T* index, INT size);
-
-     /* finds the n smallest elements in output and puts these elements as the 
-               first n elements  */
-       template <class T>
-       static void nmin(REAL* output, T* index, INT size, INT n);
-
-       /* performs a inplace unique of a vector of type T using quicksort 
-        * returns the new number of elements */
-       template <class T>
-       static INT unique(T* output, INT size) 
-       {
-               qsort(output, size) ;
-               INT i,j=0 ;
-               for (i=0; i<size; i++)
-                       if (i==0 || output[i]!=output[i-1])
-                               output[j++]=output[i] ;
-               return j ;
-       }
-
-       /* finds an element in a sorted array via binary search
-     * returns -1 if not found */
-       static inline INT fast_find(WORD* output, INT size, WORD elem)
-       {
-               INT start=0, end=size-1, middle=size/2 ;
-
-               if (output[start]>elem || output[end]<elem)
-                       return -1 ;
-
-               while (1)
-               {
-                       if (output[middle]>elem)
-                       {
-                               end = middle ;
-                               middle=start+(end-start)/2 ;
-                       } ;
-                       if (output[middle]<elem)
-                       {
-                               start = middle ;
-                               middle=start+(end-start)/2 ;
-                       }
-                       if (output[middle]==elem)
-                               return middle ;
-                       if (end-start<=1)
-                       {
-                               if (output[start]==elem)
-                                       return start ;
-                               if (output[end]==elem)
-                                       return end ;
-                               return -1 ;
-                       }
-               }
-       }
-
-       /* finds an element in a sorted array via binary search
-        * that is smaller-equal elem und the next element is larger-equal  */
-       static inline INT fast_find_range(REAL* output, INT size, REAL elem)
-       {
-               INT start=0, end=size-2, middle=(size-2)/2 ;
-
-               if (output[start]>elem)
-                       return -1 ;
-               if (output[end]<=elem)
-                       return size-1 ;
-
-               while (1)
-               {
-                       if (output[middle]>elem)
-                       {
-                               end = middle ;
-                               middle=start+(end-start)/2 ;
-                       } ;
-                       if (output[middle]<elem)
-                       {
-                               start = middle ;
-                               middle=start+(end-start)/2 ;
-                       }
-                       if (output[middle]<=elem && output[middle+1]>=elem)
-                               return middle ;
-                       if (end-start<=1)
-                       {
-                               if (output[start]<=elem && output[start+1]>=elem)
-                                       return start ;
-                               return end ;
-                       }
-               }
-       }
-
-       /** calculates ROC into (fp,tp)
-        * from output and label of length size 
-        * returns index with smallest error=fp+fn
-        */
-       static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
-       //@}
-
-       /// returns the mutual information of p which is given in logspace
-       /// where p,q are given in logspace
-       static double mutual_info(REAL* p1, REAL* p2, INT len);
-
-       /// returns the relative entropy H(P||Q), 
-       /// where p,q are given in logspace
-       static double relative_entropy(REAL* p, REAL* q, INT len);
-
-       /// returns entropy of p which is given in logspace
-       static double entropy(REAL* p, INT len);
-
-       /**@name summing functions */
-       //@{ 
-       /** sum logarithmic probabilities.
-        * Probability measures are summed up but are now given in logspace where
-        * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
-        * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
-#ifdef USE_LOGCACHE
-       static inline REAL logarithmic_sum(REAL p, REAL q)
-       {
-               if (finite(p))
-               {
-                       if (finite(q))
-                       {
-
-                               register REAL diff=p-q;
-
-                               if (diff>0)             //p>q
-                               {
-                                       if (diff > LOGRANGE)
-                                               return p;
-                                       else
-                                               return  p + logtable[(int)(diff*LOGACCURACY)];
-                               }
-                               else                    //p<=q
-                               {
-                                       if (-diff > LOGRANGE)
-                                               return  q;
-                                       else
-                                               return  q + logtable[(int)(-diff*LOGACCURACY)];
-                               }
-                       }
-                       CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
-                       return NAN;
-               }
-               else 
-                       return q;
-       }
-
-       ///init log table of form log(1+exp(x))
-       static void init_log_table();
-       
-       /// determine INT x for that log(1+exp(-x)) == 0
-       static INT determine_logrange();
-
-       /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
-       static INT determine_logaccuracy(INT range);
-#else
-       /*
-       inline REAL logarithmic_sum(REAL p, REAL q)
-       {
-           double result=comp_logarithmic_sum(p,q);
-
-           printf("diff:%f <-> %f\n",p-q, result);
-           return result;
-       }*/
-
-       static inline REAL logarithmic_sum(REAL p, REAL q)
-       {
-               if (finite(p))
-               {
-                       if (finite(q))
-                       {
-
-                               register REAL diff=p-q;
-                               if (diff>0)             //p>q
-                               {
-                                       if (diff > LOGRANGE)
-                                               return p;
-                                       else
-                                               return  p + log(1+exp(-diff));
-                               }
-                               else                    //p<=q
-                               {
-                                       if (-diff > LOGRANGE)
-                                               return  q;
-                                       else
-                                               return  q + log(1+exp(diff));
-                               }
-                       }
-                       return p;
-               }
-               else 
-                       return q;
-       }
-#endif
-#ifdef LOG_SUM_ARRAY
-       /** sum up a whole array of values in logspace.
-        * This function addresses the numeric instabiliy caused by simply summing up N elements by adding 
-        * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
-        * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
-        */
-       static inline REAL logarithmic_sum_array(REAL *p, INT len)
-         {
-           if (len<=2)
-             {
-               if (len==2)
-                 return logarithmic_sum(p[0],p[1]) ;
-               if (len==1)
-                 return p[0];
-               return -INFTY ;
-             }
-           else
-             {
-               register REAL *pp=p ;
-               if (len%2==1) pp++ ;
-               for (register INT j=0; j < len>>1; j++)
-                 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
-             }
-           return logarithmic_sum_array(p,len%2+len>>1) ;
-         } 
-#endif
-       //@}
-public:
-       /**@name constants*/
-       //@{
-       /// infinity
-       static const REAL INFTY;
-       
-       /// almost neg (log) infinity
-       static const REAL ALMOST_NEG_INFTY;
-
-       /// range for logtable: log(1+exp(x))  -LOGRANGE <= x <= 0
-       static INT LOGRANGE;
-       
-#ifdef USE_LOGCACHE    
-       /// number of steps per integer
-       static INT LOGACCURACY;
-       //@}
-protected:
-       ///table with log-values
-       static REAL* logtable;  
-#endif
-       static CHAR rand_state[256];
-};
-
-
-//implementations of template functions
-template <class T>
-void CMath::qsort(T* output, INT size)
-{
-       if (size==2)
-       {
-               if (output[0] > output [1])
-                       swap(output[0],output[1]);
-       }
-       else
-       {
-               REAL split=output[(size*rand())/(RAND_MAX+1)];
-               //REAL split=output[size/2];
-
-               INT left=0;
-               INT right=size-1;
-
-               while (left<=right)
-               {
-                       while (output[left] < split)
-                               left++;
-                       while (output[right] > split)
-                               right--;
-
-                       if (left<=right)
-                       {
-                               swap(output[left],output[right]);
-                               left++;
-                               right--;
-                       }
-               }
-
-               if (right+1> 1)
-                       qsort(output,right+1);
-
-               if (size-left> 1)
-                       qsort(&output[left],size-left);
-       }
-}
-
-template <class T1,class T2>
-void CMath::qsort(T1* output, T2* index, INT size)
-{
-       if (size==2)
-       {
-               if (output[0] > output [1]){
-                       swap(output[0],output[1]);
-                       swap(index[0],index[1]);
-               }
-               
-       }
-       else
-       {
-               REAL split=output[(size*rand())/(RAND_MAX+1)];
-               //REAL split=output[size/2];
-               
-               INT left=0;
-               INT right=size-1;
-               
-               while (left<=right)
-               {
-                       while (output[left] < split)
-                               left++;
-                       while (output[right] > split)
-                               right--;
-                       
-                       if (left<=right)
-                       {
-                               swap(output[left],output[right]);
-                               swap(index[left],index[right]);
-                               left++;
-                               right--;
-                       }
-               }
-               
-               if (right+1> 1)
-                       qsort(output,index,right+1);
-               
-               if (size-left> 1)
-                       qsort(&output[left],&index[left], size-left);
-       }
-}
-
-template <class T1,class T2>
-void CMath::qsort_backward(T1* output, T2* index, INT size)
-{
-       if (size==2)
-       {
-               if (output[0] < output [1]){
-                       swap(output[0],output[1]);
-                       swap(index[0],index[1]);
-               }
-               
-       }
-       else
-       {
-               REAL split=output[(size*rand())/(RAND_MAX+1)];
-               //REAL split=output[size/2];
-               
-               INT left=0;
-               INT right=size-1;
-               
-               while (left<=right)
-               {
-                       while (output[left] > split)
-                               left++;
-                       while (output[right] < split)
-                               right--;
-                       
-                       if (left<=right)
-                       {
-                               swap(output[left],output[right]);
-                               swap(index[left],index[right]);
-                               left++;
-                               right--;
-                       }
-               }
-               
-               if (right+1> 1)
-                       qsort(output,index,right+1);
-               
-               if (size-left> 1)
-                       qsort(&output[left],&index[left], size-left);
-       }
-}
-
-template <class T> 
-void CMath::nmin(REAL* output, T* index, INT size, INT n)
-{
-       if (6*n*size<13*size*log((double)size))
-               for (INT i=0; i<n; i++)
-                       min(&output[i], &index[i], size-i) ;
-       else
-               qsort(output, index, size) ;
-}
-
-template <class T>
-void CMath::min(REAL* output, T* index, INT size)
-{
-       if (size<=0)
-               return ;
-       REAL min_elem = output[0] ;
-       INT min_index = 0 ;
-       for (INT i=1; i<size; i++)
-               if (output[i]<min_elem)
-               {
-                       min_index=i ;
-                       min_elem=output[i] ;
-               }
-       swap(output[0], output[min_index]) ;
-       swap(index[0], index[min_index]) ;
-}
-#endif
diff --git a/QPalmaDP/QPalmaDP.i b/QPalmaDP/QPalmaDP.i
deleted file mode 100644 (file)
index 66ed219..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-%module QPalmaDP
-%{
-#define SWIG_FILE_WITH_INIT
-#include "common.h"
-#include "Mathmatics.h"
-#include "penalty_info.h"
-#include "qpalma_dp.h"
-%}
-
-%include "std_vector.i"
-%include "std_string.i"
-
-%include "carrays.i"
-
-%include "common.h"
-%include "Mathmatics.h"
-%include "penalty_info.h"
-%include "qpalma_dp.h"
-
-%array_class(int, intArray) ;
-%array_class(double, doubleArray) ;
-%array_functions(double, doubleFArray) ;
-%array_functions(struct penalty_struct, penaltyArray) ;
-
-/*
-%array_functions(int, intArray) ;
-%array_functions(double, doubleArray) ;
-*/
-%array_class(Pre_score, Pre_scoreArray) ;
-
-%pythoncode
-%{
-
-def createPenaltyArrayFromList(list):
-   array = new_penaltyArray(len(list))
-   for i in range(len(list)):
-      penaltyArray_setitem(array, i, list[i])
-   return array
-
-def createDoubleArrayFromList(list):
-   #array = new_doubleArray(len(list))
-   array = doubleArray(len(list))
-   for i in range(len(list)):
-   #   doubleArray_setitem(array, i, list[i])
-      array[i] = list[i]
-   return array
-
-def createIntArrayFromList(list):
-   #array = new_intArray(len(list))
-   array = intArray(len(list))
-   for i in range(len(list)):
-      #intArray_setitem(array, i, list[i])
-      array[i] = list[i]
-   return array
-
-def createListFromIntArray(array, array_len):
-   list = [0]*array_len
-   for i in range(array_len):
-      list[i] = intArray_getitem(array,i)
-   return list
-
-def createListFromDoubleArray(array, array_len):
-   list = [0]*array_len
-   for i in range(array_len):
-      list[i] = doubleArray_getitem(array,i)
-   return list
-
-%} 
diff --git a/QPalmaDP/common.h b/QPalmaDP/common.h
deleted file mode 100644 (file)
index 61776e9..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef __COMMON_H__
-#define __COMMON_H__
-
-#include <stdlib.h> 
-#include <stdio.h> 
-#include "config.h"
-
-#ifdef SUNOS
-#define bool int
-#define false 0
-#define true 1
-#endif
-
-#ifndef LINUX
-#define RANDOM_MAX 2147483647
-#else
-#define RANDOM_MAX RAND_MAX
-#endif
-
-/**@name Standard Types 
- * Definition of Platform independent Types
-*/
-//@{
-
-/// Type CHAR
-typedef char CHAR;
-typedef CHAR* P_CHAR;
-
-/// Type BYTE 
-typedef unsigned char BYTE;
-typedef BYTE* P_BYTE;
-
-/// Type SHORT is 2 bytes in size
-typedef short int SHORT;
-typedef SHORT* P_SHORT;
-
-/// Type WORD is 2 bytes in size
-typedef unsigned short int WORD;
-typedef WORD* P_WORD;
-
-/// Type INT is 4 bytes in size
-typedef int INT;
-typedef INT* P_INT;
-
-/// Type INT is 4 bytes in size
-typedef unsigned int UINT;
-typedef UINT* P_UINT;
-
-/// Type LONG is 8 bytes in size
-typedef long LONG;
-typedef LONG* P_LONG;
-
-/// Type SHORTREAL is 4 bytes in size
-typedef float SHORTREAL;
-typedef SHORTREAL* P_SHORTREAL;
-
-/// Type REAL is 8 bytes in size
-typedef double REAL;
-typedef REAL* P_REAL;
-
-/// Type LONGREAL is 16 bytes in size
-//typedef long double LONGREAL;
-//typedef LONGREAL* P_LONGREAL;
-
-#ifdef USE_SHORTREAL_KERNELCACHE
-       typedef SHORTREAL KERNELCACHE_ELEM;
-#else
-       typedef REAL KERNELCACHE_ELEM;
-#endif
-
-typedef KERNELCACHE_ELEM P_KERNELCACHE_ELEM;
-
-typedef LONG KERNELCACHE_IDX;
-
-/// The io libs output [DEBUG] etc in front of every CIO::message
-/// 'higher' messages filter output depending on the loglevel, i.e. CRITICAL messages
-/// will print all M_CRITICAL TO M_EMERGENCY messages to
-enum EMessageType
-{
-       M_DEBUG,
-       M_INFO,
-       M_NOTICE,
-       M_WARN,
-       M_ERROR,
-       M_CRITICAL,
-       M_ALERT,
-       M_EMERGENCY,
-       M_MESSAGEONLY,
-       M_PROGRESS
-};
-
-enum EKernelType
-{
-       K_UNKNOWN = 0,
-       K_OPTIMIZABLE = 4096,
-       K_LINEAR = 10 | K_OPTIMIZABLE,
-       K_POLY  = 20,
-       K_GAUSSIAN = 30,
-       K_HISTOGRAM = 40,
-       K_SALZBERG = 41,
-       K_LOCALITYIMPROVED = 50,
-       K_SIMPLELOCALITYIMPROVED = 60,
-       K_FIXEDDEGREE = 70,
-       K_WEIGHTEDDEGREE =    80 | K_OPTIMIZABLE,
-       K_WEIGHTEDDEGREEPOS = 81 | K_OPTIMIZABLE,
-       K_WEIGHTEDDEGREEPOLYA =    82,
-       K_WD = 83,
-       K_COMMWORD = 90 | K_OPTIMIZABLE ,
-       K_POLYMATCH = 100,
-       K_ALIGNMENT = 110,
-       K_COMMWORDSTRING = 120 | K_OPTIMIZABLE,
-       K_SPARSENORMSQUARED = 130,
-       K_COMBINED = 140 | K_OPTIMIZABLE
-};
-
-enum EFeatureType
-{
-       F_UNKNOWN = 0,
-       F_REAL = 10,
-       F_SHORT = 20,
-       F_CHAR = 30,
-       F_INT = 40,
-       F_BYTE = 50,
-       F_WORD = 60
-};
-
-enum EFeatureClass
-{
-       C_UNKNOWN = 0,
-       C_SIMPLE = 10,
-       C_SPARSE = 20,
-       C_STRING = 30,
-       C_COMBINED = 40
-};
-
-/// Alphabet of charfeatures/observations
-enum E_ALPHABET
-{
-       /// DNA - letters A,C,G,T,*,N,n
-       DNA=0,
-
-       /// PROTEIN - letters a-z
-       PROTEIN=1,
-
-       /// ALPHANUM - [0-9a-z]
-       ALPHANUM=2,
-
-       /// CUBE - [1-6]
-       CUBE=3,
-
-       /// NONE - type has no alphabet
-       NONE=4
-};
-
-//@}
-
-#define TMP_DIR "/tmp/"
-//#define TMP_DIR "/short/x46/tmp/"
-
-#endif
diff --git a/QPalmaDP/config.h b/QPalmaDP/config.h
deleted file mode 100644 (file)
index 88b448b..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-
-#define HAVE_MATLAB 1
-
-#define HAVE_LARGEFILE 1
-
-
-
-
-
-
-#define USE_BIGSTATES 1
-
-
-#define USE_HMMCACHE 1
-
-
-
-
-
diff --git a/QPalmaDP/debug_tools.h b/QPalmaDP/debug_tools.h
deleted file mode 100644 (file)
index 7243830..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __DEBUG_TOOLS__
-#define __DEBUG_TOOLS__
-
-#include <cstdio>
-
-void fassert(bool exp,int line, char* file) {
-   if (! exp) {
-      printf("invalid fassert at line %d in file %s!\n",line,file);
-      exit(EXIT_FAILURE);
-   }
-}
-
-#define FA(expr) (fassert(expr,__LINE__,__FILE__))
-
-#endif // __DEBUG_TOOLS__
diff --git a/QPalmaDP/fill_matrix.cpp b/QPalmaDP/fill_matrix.cpp
deleted file mode 100644 (file)
index 1425ade..0000000
+++ /dev/null
@@ -1,485 +0,0 @@
-/*********************************************************************************************/
-// Version of fill_matrix with fills several alignment matrices (best, second_best ...)
-// nr_paths det. number of these matrices
-//
-// est_len: Number of columns in alignment matrix: length(EST) +1 (for the gap) (variable: i)
-// dna_len: Number of lines in alignment matrix: length(DNA) +1 (for the gap) (variable: j)
-//
-// !!! IMPORTANT !!! The pictures and equations in the paper/poster etc. describe the algorithm
-// for an alignment matrix where DNA and EST changed places !!! IMPORTANT !!!
-/*********************************************************************************************/
-
-/*
-  matchmatrix: columnwise
-    - A C G T N (dna)
-  - x x x x x x
-  A x x x x x x        
-  C x x x x x x
-  G x x x x x x
-  T x x x x x x
-  N x x x x x x
-(est)
-*/
-
-/*
-  alignment matrix: columnwise
-   |-EST . . .
-  -+------------
-  -|00 ...
-  D|0.
-  N|. .
-  A|.  .
-  .|.
-  .|
-  .|
-*/
-
-
-/* TODO
-   - max_len = 10000 for elegans!
-   - do not use double
-*/
-
-#include "fill_matrix.h"
-
-void fassert(bool exp,int line, char* file);
-#define FA(expr) (fassert(expr,__LINE__,__FILE__))
-
-using namespace std;
-
-int check_char(char base)
-{
-  switch(base)
-  {
-  case 'A':
-  case 'a': return 1;
-  case 'C':    
-  case 'c': return 2;
-  case 'G':
-  case 'g': return 3;
-  case 'T':
-  case 't': return 4;
-  case 'N':
-  case 'n': return 5; 
-  //default : return 0;
-  default : return -1;
-  }
-  
-}
-
- /* system of Palma. Otherwise scoring_functions should point to an array of
- * plifs scoring the respective pairs of characters together with the EST
- * quality score.
- */
-
-double getScore(struct penalty_struct* qualityScores, int mlen, int dnaChar, int estChar, double baseScore) {
-   double score = 0.0;
-   FA(estChar > 0 && estChar < 6);
-   FA(dnaChar > -1 && dnaChar < 6);
-
-   //printf("Starting scoring / scheme is USE_QUALITY_SCORES\n");
-   //printf("estChar,dnaChar are: %d,%d,%d",estChar,dnaChar);
-   
-   int currentPos = (estChar-1)*6+dnaChar;
-   struct penalty_struct currentPen = qualityScores[currentPos];
-
-       INT idx = 0 ;
-       for (INT i=0; i<currentPen.len; i++)
-               if (currentPen.limits[i] <= baseScore)
-                       idx++ ;
-       
-       if (idx==0)
-               score=currentPen.penalties[0] ;
-       else if (idx==currentPen.len)
-               score=currentPen.penalties[currentPen.len-1] ;
-       else
-       {
-               score = (currentPen.penalties[idx]*(baseScore-currentPen.limits[idx-1]) + currentPen.penalties[idx-1]*
-                          (currentPen.limits[idx]-baseScore)) / (currentPen.limits[idx]-currentPen.limits[idx-1]) ;  
-   }
-
-   //score = lookup_penalty(&currentPen,idx, NULL, 0);
-   //if(baseScore <= currentPen.limits[0]) {
-   //   score = currentPen.penalties[0];
-   //} else {
-   //   int sp_idx;
-   //   for(sp_idx=0;sp_idx<currentPen.len;sp_idx++) {
-   //      if(currentPen.limits[sp_idx] <= baseScore )
-   //         score = currentPen.penalties[sp_idx];
-   //   //printf("Current score is %f\n",score);
-   //   }
-   //}
-
-   return score;
-}
-
-void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, double* prb, penalty_struct* functions , double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions, mode currentMode)
-{      
-  //printf("Entering fill_matrix...\n");
-
-  /*********************************************************************************************/
-  // variables
-  /*********************************************************************************************/
-  const int mlen = 6; // length of matchmatrix
-  int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
-
-  double baseScore;
-  double *est_scores = prb ;
-
-  double prevValue ;
-  double tempValue;
-  int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
-  //printf("max_len: %d\n", max_len) ;
-
-  int num_elem ; // counter variable
-
-  const int max_num_elem = (4+nr_paths+max_len) * nr_paths ; // upper bound on num_elem
-  double *scores=new double[max_num_elem] ; //are sorted later
-  struct ArrayElem *array=new ArrayElem[max_num_elem] ;
-  //printf("%d\n", max_num_elem*sizeof(ArrayElem)) ;
-  
-  int max_ss_element[nr_paths*nr_paths]; //position on dna with max ss score for introns of length greater than max_len
-  int splice_pos ; //splice position get from donor_sites
-  int start_splice_pos ; //where to start looking in donor_sites
-
-  double max_scores[nr_paths] ;
-
-  /*********************************************************************************************/
-  // Initialisation of all alignment matrices, columnwise
-  /*********************************************************************************************/
-  for (int z=0; z<nr_paths; z++) /* matrice z */
-  {
-    max_scores[z] = log(0.0) ;
-    max_score_positions[2*z]    = 0; // pos i
-    max_score_positions[2*z +1] = 0; // pos j
-
-    for (int i=0; i<est_len; i++) /* i.column */
-    {
-      for (int j=0; j<dna_len; j++) /* j.line */
-      {
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->value = log(0.0); // -inf
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_i = 0;
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_j = 0;
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_matrix_no = 0;
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->issplice = 0;
-      }
-    }
-  }
-
-  /*********************************************************************************************/      
-  // Element (0,0) for first/best matrice (z=0)
-  /*********************************************************************************************/       
-  ((Pre_score*)matrices[0])->value = 0;
-  ((Pre_score*)matrices[0])->prev_i = 0;
-  ((Pre_score*)matrices[0])->prev_j = 0;
-  ((Pre_score*)matrices[0])->prev_matrix_no = 0;
-  ((Pre_score*)matrices[0])->issplice = 0;
-
-  /*********************************************************************************************/      
-  // Line Zero for first/best matrice (z=0)
-  /*********************************************************************************************/
-  for (int i=1; i<est_len; i++) /* 0.line, all columns (DNA='-') */
-  {    
-    ((Pre_score*)matrices[0] +(i*dna_len))->value = 0 ;                
-    ((Pre_score*)matrices[0] +(i*dna_len))->prev_i = i-1;
-    ((Pre_score*)matrices[0] +(i*dna_len))->prev_j = 0;    
-    ((Pre_score*)matrices[0] +(i*dna_len))->prev_matrix_no = 0;
-    ((Pre_score*)matrices[0] +(i*dna_len))->issplice = 0;
-  }
-
-
-  /*********************************************************************************************/      
-  // Column Zero for first/best matrice (z=0)
-  /*********************************************************************************************/      
-  for (int j=1; j<dna_len; j++) // 0.column, all lines
-  {      
-    ((Pre_score*)matrices[0] +j)->value =  0 ;
-    ((Pre_score*)matrices[0] +j)->prev_i = 0 ;
-    ((Pre_score*)matrices[0] +j)->prev_j = j-1 ;
-    ((Pre_score*)matrices[0] +j)->prev_matrix_no = 0 ;
-    ((Pre_score*)matrices[0] +j)->issplice = 0;
-  }
-
-   
-  /*********************************************************************************************/      
-  // !!!! Column Zero and line Zero for all other matrices except the best one (z>0): -INF !!!! 
-  /*********************************************************************************************/      
-
-
-   //printf("after initialization\n");
-  
-  /*********************************************************************************************/
-  // all other entries for all matrices, last column and row treated seperately
-  /*********************************************************************************************/
-  
-  /* columnwise, 0.column and 0.line already filled */
-  for (int i=1; i<est_len; i++) /* i.column */
-  {
-    
-    start_splice_pos = 0 ; //where to start looking in donor_sites
-    for (int zz=0; zz<nr_paths*nr_paths; zz++) {max_ss_element[zz]=0 ;} // initializing
-  
-    for (int j=1; j<dna_len; j++) //j.line
-    {
-      dnaChar = check_char(dna[j-1]) ; // -1 because of of the gaps in the 0.line/column of the al-matrix
-      estChar = check_char(est[i-1]) ;
-      baseScore = est_scores[i-1];
-
-      //printf("at iteration %d %d\n",i,j);
-
-      //if introns of length greater than max_len possible
-      if (j > max_len)
-      {
-       //storing highest ss_score (the position) and NOT -INF
-       int new_ss_pos = j-max_len ;
-       double donor_score = donor[new_ss_pos-1] ;
-       if (isfinite(donor_score)) //splice site!
-        {
-         if (max_ss_element[nr_paths-1] != 0)
-          { 
-           for (int zz=0; zz<nr_paths; zz++) //for each alignment matrix
-            {
-             int pos_with_minscore = nr_paths ; //assuming, the new score is the worst score
-             double new_intron_score = donor_score+((Pre_score*)matrices[zz]+(i*dna_len)+new_ss_pos-1)->value ; //could be -INF
-             if (!(isfinite(new_intron_score))) {printf("new_intron_score is -INF\n") ;}
-             for (int pos=0; pos<nr_paths; pos++) //find worst score of all nr_paths+1 scores
-              {
-               int act_splice_position = max_ss_element[zz*nr_paths+pos] ;
-               assert(isfinite(donor[act_splice_position-1])) ;
-               double act_intron_score = donor[act_splice_position-1]+((Pre_score*)matrices[zz]+(i*dna_len)+act_splice_position-1)->value ;
-               if (new_intron_score >= act_intron_score) 
-                {
-                 new_intron_score = act_intron_score ;
-                 pos_with_minscore = pos ;
-               } //else nothing happens, new intron score still the worst one
-             }
-             if (pos_with_minscore != nr_paths)
-              {
-               max_ss_element[zz*nr_paths+pos_with_minscore] = new_ss_pos ; //replacing
-             }
-           }
-         }
-         else
-          {
-           for (int pos=0; pos<nr_paths; pos++)
-            {
-             if (max_ss_element[pos]==0)  //no previous splice site at this pos
-              {
-               for (int zz=0; zz<nr_paths; zz++) {max_ss_element[zz*nr_paths+pos]=j-max_len ;}
-               break ;
-             }
-           } 
-         }
-       }
-       //start_pos in donor_sites so that only introns shorter then max_length are seen
-       if ((start_splice_pos < nr_donor_sites) && donor_sites[start_splice_pos] == j-max_len)
-        {
-         start_splice_pos++ ;
-       }
-      }
-      
-
-      /* Computing all possible scores */
-      num_elem = 0 ; /* number of different scores/possibilities so far */
-      for (int z=0; z<nr_paths; z++) /* matrice z */
-      {
-       Pre_score* actMatrix = (Pre_score*)matrices[z]; /* hm, why?  */
-
-       /***************************************/
-       // 0. Zero (local alignment version)
-        /***************************************/
-       assert(num_elem<max_num_elem) ;
-       array[num_elem].prev_i = 0; 
-       array[num_elem].prev_j = 0; 
-       array[num_elem].prev_matrix_no = z;
-       array[num_elem].isSplice = false; /* no splice site */
-       scores[num_elem] = 0 ;
-       num_elem++ ;
-
-
-        /***************************************/
-       // 1. gap on EST (j-1,i) -> (j,i) 
-        /***************************************/
-       prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
-       if (isfinite(prevValue))
-       {
-
-      if( currentMode == USE_QUALITY_SCORES ) {
-         tempValue = prevValue + matchmatrix[dnaChar];
-      } else {
-             tempValue = prevValue + matchmatrix[mlen*dnaChar];   /* score(gap,DNA) */
-      }
-
-
-         if (isfinite(tempValue))
-         {
-           assert(num_elem<max_num_elem) ;
-           array[num_elem].prev_i = i; 
-           array[num_elem].prev_j = j-1; /* predecessor */
-           array[num_elem].prev_matrix_no = z;
-           array[num_elem].isSplice = false; /* no splice site */
-           scores[num_elem] = -tempValue ;
-           num_elem++ ;
-         }
-       }
-      
-
-       /***************************************/
-       // 2. match/mismatch (j-1,i-1) -> (j,i)
-       /***************************************/
-       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
-       if (isfinite(prevValue))
-       {
-      if( currentMode == USE_QUALITY_SCORES ) {
-        tempValue = prevValue + getScore(qualityScores,mlen,dnaChar,estChar,baseScore);
-      } else {
-        tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);
-     }
-
-         if (isfinite(tempValue))
-         {
-           assert(num_elem<max_num_elem) ;
-           array[num_elem].prev_i = i-1; /* predecessor */
-           array[num_elem].prev_j = j-1; /* predecessor */
-           array[num_elem].prev_matrix_no = z;
-           array[num_elem].isSplice = false; /* no splice site */
-           scores[num_elem] = -tempValue ;
-           num_elem++ ;
-         }
-       }
-       
-
-       /***************************************/
-       // 3. gap on DNA (j,i-1) -> (j,i)
-       /***************************************/
-       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
-       if (isfinite(prevValue))
-       {
-
-      if( currentMode == USE_QUALITY_SCORES ) {
-        tempValue = prevValue + getScore(qualityScores,mlen,0,estChar,baseScore);
-      } else {
-            tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
-     }
-
-         if (isfinite(tempValue))
-         {
-           assert(num_elem<max_num_elem) ;
-           array[num_elem].prev_i = i-1; /* predecessor */
-           array[num_elem].prev_j = j; 
-           array[num_elem].prev_matrix_no = z;
-           array[num_elem].isSplice = false;  /* no splice site */
-           scores[num_elem]=-tempValue ;
-           num_elem++ ;
-         }
-       }
-
-       /***************************************/
-       // 4. all possible splice sites/introns
-       /***************************************/
-       // acceptor[j-1]: g of ag (-1 because cpp starts counting at 0)
-       // donor[splice_pos-1]: g of gt/gc (-1 because cpp starts counting at 0)
-       // donor_sites: the g's of the gt/gc
-       
-       if (isfinite(acceptor[j-1]))
-       {
-         
-         //the nr_paths many introns of length greater than max_len
-         for (int pos=0; pos<nr_paths; pos++)
-          {
-           if (max_ss_element[z*nr_paths+pos]>0)
-            {
-             assert((j-max_ss_element[z*nr_paths+pos]+1) > max_len) ;
-             splice_pos = max_ss_element[z*nr_paths+pos] ; //the g of the gt 
-             prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
-             if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
-              {
-               int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
-               assert(dist>max_len) ;
-               tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
-               if (isfinite(tempValue))
-                {
-                 assert(num_elem<max_num_elem) ;
-                 array[num_elem].prev_i = i; 
-                 array[num_elem].prev_j = splice_pos-1; // predecessor
-                 array[num_elem].prev_matrix_no = z;
-                 array[num_elem].isSplice = true; // splice site
-                 scores[num_elem]=-tempValue ;
-                 num_elem++ ;
-               }
-               else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
-                {
-                 printf("Something wrong with donor_sites(1000er) (intron_length: %d)\n", dist) ;
-                 sleep(5000) ;
-               }
-             }
-           }
-         }
-
-          //all introns of length <= max_len
-          for (int k=start_splice_pos; k < nr_donor_sites; k++) 
-         //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
-         {   
-           splice_pos = donor_sites[k] ;
-           if (splice_pos > j-4) { break ;}
-           prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
-           
-           if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
-           {
-             int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
-             assert(dist<=max_len) ;
-             tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
-             if (isfinite(tempValue))
-             {
-               assert(num_elem<max_num_elem) ;
-               array[num_elem].prev_i = i; 
-               array[num_elem].prev_j = splice_pos-1; /* predecessor */
-               array[num_elem].prev_matrix_no = z;
-               array[num_elem].isSplice = true; /* splice site */
-               scores[num_elem]=-tempValue ;
-               num_elem++ ;
-             }
-             else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
-             {
-               printf("Something wrong with donor_sites (intron_length: %d)\n", dist) ;
-               sleep(5000) ;
-             }
-           }
-         }
-       }
-
-      } //end of z
-      
-      CMath::nmin<struct ArrayElem>(scores, array, num_elem, nr_paths);
-      
-      /* the nr_paths many highest scores are written in the nr_paths many matrices*/
-      for (int z=0; z<nr_paths; z++) {
-       if (z>=num_elem) {
-         //nothing happens, entry in matrices[z] at pos. (i,j) still -inf
-       }
-       else {
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->value = -scores[z] ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_i = array[z].prev_i ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_j = array[z].prev_j ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->issplice = array[z].isSplice ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_matrix_no = array[z].prev_matrix_no;
-
-         if (-scores[z]>=max_scores[z]) { //storing the postion of the highest alignment score yet
-           max_scores[z] = -scores[z] ;
-           max_score_positions[2*z]    = i; // pos i
-           max_score_positions[2*z +1] = j; // pos j
-         }
-       }
-      } //end of z
-
-    } //end of j
-  } // end of i
-
-  //free memory
-  delete[] array;
-  delete[] scores;
-
-  //printf("Leaving fill_matrix...\n");
-  return;
-}
diff --git a/QPalmaDP/fill_matrix.h b/QPalmaDP/fill_matrix.h
deleted file mode 100644 (file)
index 9432c4e..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef _FILLMATRIX_H__
-#define _FILLMATRIX_H__
-
-#include "Mathmatics.h"
-#include "penalty_info.h"
-#include <algorithm> //Eigentlich in Header
-#include <assert.h>
-#include <ctype.h>
-#include <iostream>
-#include <math.h>
-#include <stdlib.h> 
-#include <string.h>
-
-
-struct ArrayElem { //24B
-  double score;
-  int prev_j;
-  int prev_i;
-  int prev_matrix_no;   
-  bool isSplice;
-} ;
-
-struct pre_score { //24B
-  double value; //8
-  int prev_i; //4
-  int prev_j; //4
-  int prev_matrix_no;//4
-  bool issplice;//4
-};
-
-typedef struct pre_score Pre_score;
-
-typedef struct Align_pair { //24B
-  double value ; 
-  bool issplice;
-  int previ,prevj ;
-  char est_char;
-  char dna_char;
-};
-
-
-void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
-// funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
-
-int check_char(char base);
-
-bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
-
-extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
-
-double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar);
-
-
-double scoreMatch(int dnaChar, int estChar, double baseScore);
-
-
-#endif // _FILLMATRIX_H__
diff --git a/QPalmaDP/io.cpp b/QPalmaDP/io.cpp
deleted file mode 100644 (file)
index 5ae3e1c..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "io.h"
-#include "common.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-#include <ctype.h>
-
-FILE* CIO::target=stdout;
-
-CIO::CIO()
-{
-}
-
-void CIO::message(EMessageType prio, const CHAR *fmt, ... )
-{
-       check_target();
-       print_message_prio(prio, target);
-    va_list list;
-    va_start(list,fmt);
-    vfprintf(target,fmt,list);
-    va_end(list);
-    fflush(target);
-}
-
-void CIO::buffered_message(EMessageType prio, const CHAR *fmt, ... )
-{
-       check_target();
-       print_message_prio(prio, target);
-    va_list list;
-    va_start(list,fmt);
-    vfprintf(target,fmt,list);
-    va_end(list);
-}
-
-CHAR* CIO::skip_spaces(CHAR* str)
-{
-       INT i=0;
-
-       if (str)
-       {
-               for (i=0; isspace(str[i]); i++);
-
-               return &str[i];
-       }
-       else 
-               return str;
-}
-
-void CIO::set_target(FILE* t)
-{
-       target=t;
-}
-
-void CIO::check_target()
-{
-       if (!target)
-               target=stdout;
-}
-
-void CIO::print_message_prio(EMessageType prio, FILE* target)
-{
-       switch (prio)
-       {
-               case M_DEBUG:
-                       fprintf(target, "[DEBUG] ");
-                       break;
-               case M_PROGRESS:
-                       fprintf(target, "[PROGRESS] ");
-                       break;
-               case M_INFO:
-                       //fprintf(target, "[INFO] ");
-                       break;
-               case M_NOTICE:
-                       fprintf(target, "[NOTICE] ");
-                       break;
-               case M_WARN:
-                       fprintf(target, "[WARN] ");
-                       break;
-               case M_ERROR:
-                       fprintf(target, "[ERROR] ");
-                       break;
-               case M_CRITICAL:
-                       fprintf(target, "[CRITICAL] ");
-                       break;
-               case M_ALERT:
-                       fprintf(target, "[ALERT] ");
-                       break;
-               case M_EMERGENCY:
-                       fprintf(target, "[EMERGENCY] ");
-                       break;
-               case M_MESSAGEONLY:
-                       break;
-               default:
-                       break;
-       }
-}
diff --git a/QPalmaDP/io.h b/QPalmaDP/io.h
deleted file mode 100644 (file)
index c127f92..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __CIO_H__
-#define __CIO_H__
-
-#include "common.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-
-class CIO
-{
-public:
-       CIO();
-
-       static void set_target(FILE* target);
-       static void message(EMessageType prio, const CHAR *fmt, ... );
-
-       inline static void not_implemented() 
-       {
-               message(M_ERROR, "Sorry, not yet implemented\n");
-       }
-
-       static void buffered_message(EMessageType prio, const CHAR *fmt, ... );
-
-       static CHAR* skip_spaces(CHAR* str);
-
-protected:
-       static void check_target();
-       static void print_message_prio(EMessageType prio, FILE* target);
-
-protected:
-       static FILE* target;
-};
-#endif
diff --git a/QPalmaDP/penalty_info.cpp b/QPalmaDP/penalty_info.cpp
deleted file mode 100644 (file)
index d5d739f..0000000
+++ /dev/null
@@ -1,368 +0,0 @@
-#include <assert.h>
-#include "config.h"
-//#include "features/CharFeatures.h"
-//#include "features/StringFeatures.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#include "io.h"
-
-
-#include "fill_matrix.h"
-#include "penalty_info.h"
-
-void init_penalty_struct(struct penalty_struct &PEN)
-{
-       PEN.limits=NULL ;
-       PEN.penalties=NULL ;
-       PEN.id=-1 ;
-       PEN.next_pen=NULL ;
-       PEN.transform = T_LINEAR ;
-       PEN.name = NULL ;
-       PEN.max_len=0 ;
-       PEN.min_len=0 ;
-       PEN.cache=NULL ;
-       PEN.use_svm=0 ;
-}
-
-void init_penalty_struct_cache(struct penalty_struct &PEN)
-{
-       if (PEN.cache || PEN.use_svm)
-               return ;
-               
-       REAL* cache=new REAL[PEN.max_len+1] ;
-       if (cache)
-       {
-               for (INT i=0; i<=PEN.max_len; i++)
-                       cache[i] = lookup_penalty(&PEN, i, 0, false) ;
-               PEN.cache = cache ;
-       }
-}
-
-void delete_penalty_struct_palma(struct penalty_struct &PEN)
-{
-       if (PEN.id!=-1)
-       {
-               delete[] PEN.limits ;
-               delete[] PEN.penalties ;
-               delete[] PEN.name ;
-               delete[] PEN.cache ;
-       }
-}
-
-void delete_penalty_struct_array(struct penalty_struct *PEN, INT len)
-{
-       for (int i=0; i<len; i++)
-               delete_penalty_struct_palma(PEN[i]) ;
-       delete[] PEN ;
-}
-
-void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp) {
-   newp->len       = old->len;
-   newp->limits    = old->limits;
-   newp->penalties = old->penalties;
-   newp->max_len   = old->max_len;
-   newp->min_len   = old->min_len;
-   newp->cache     = old->cache;
-   newp->transform = old->transform;
-   newp->id        = old->id;
-   newp->next_pen  = old->next_pen;
-   newp->name      = old->name;
-   newp->use_svm   = old->use_svm;
-}
-
-
-//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, int &P)
-//{
-//     P = mxGetN(mx_penalty_info) ;
-//     
-//     struct penalty_struct * PEN = new struct penalty_struct[P] ;
-//     for (INT i=0; i<P; i++)
-//              init_penalty_struct(PEN[i]) ;
-//     
-//     for (INT i=0; i<P; i++)
-//     {
-//       const mxArray* mx_elem = mxGetCell(mx_penalty_info, i) ;
-//       if (mx_elem==NULL || !mxIsStruct(mx_elem))
-//         {
-//           CIO::message(M_ERROR, "empty cell element\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         } ;
-//       const mxArray* mx_id_field = mxGetField(mx_elem, 0, "id") ;
-//       if (mx_id_field==NULL || !mxIsNumeric(mx_id_field) || 
-//           mxGetN(mx_id_field)!=1 || mxGetM(mx_id_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing id field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_limits_field = mxGetField(mx_elem, 0, "limits") ;
-//       if (mx_limits_field==NULL || !mxIsNumeric(mx_limits_field) ||
-//           mxGetM(mx_limits_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing limits field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT len = mxGetN(mx_limits_field) ;
-//       
-//       const mxArray* mx_penalties_field = mxGetField(mx_elem, 0, "penalties") ;
-//       if (mx_penalties_field==NULL || !mxIsNumeric(mx_penalties_field) ||
-//           mxGetM(mx_penalties_field)!=1 || mxGetN(mx_penalties_field)!=len)
-//         {
-//           CIO::message(M_ERROR, "missing penalties field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_transform_field = mxGetField(mx_elem, 0, "transform") ;
-//       if (mx_transform_field==NULL || !mxIsChar(mx_transform_field))
-//         {
-//           CIO::message(M_ERROR, "missing transform field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_name_field = mxGetField(mx_elem, 0, "name") ;
-//       if (mx_name_field==NULL || !mxIsChar(mx_name_field))
-//         {
-//           CIO::message(M_ERROR, "missing name field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_max_len_field = mxGetField(mx_elem, 0, "max_len") ;
-//       if (mx_max_len_field==NULL || !mxIsNumeric(mx_max_len_field) ||
-//           mxGetM(mx_max_len_field)!=1 || mxGetN(mx_max_len_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing max_len field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_min_len_field = mxGetField(mx_elem, 0, "min_len") ;
-//       if (mx_min_len_field==NULL || !mxIsNumeric(mx_min_len_field) ||
-//           mxGetM(mx_min_len_field)!=1 || mxGetN(mx_min_len_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing min_len field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_use_svm_field = mxGetField(mx_elem, 0, "use_svm") ;
-//       if (mx_use_svm_field==NULL || !mxIsNumeric(mx_use_svm_field) ||
-//           mxGetM(mx_use_svm_field)!=1 || mxGetN(mx_use_svm_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing use_svm field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT use_svm = (INT) mxGetScalar(mx_use_svm_field) ;
-//             //fprintf(stderr, "use_svm_field=%i\n", use_svm) ;
-//       
-//       const mxArray* mx_next_id_field = mxGetField(mx_elem, 0, "next_id") ;
-//       if (mx_next_id_field==NULL || !mxIsNumeric(mx_next_id_field) ||
-//           mxGetM(mx_next_id_field)!=1 || mxGetN(mx_next_id_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing next_id field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT next_id = (INT) mxGetScalar(mx_next_id_field)-1 ;
-//       
-//       INT id = (INT) mxGetScalar(mx_id_field)-1 ;
-//       if (i<0 || i>P-1)
-//         {
-//           CIO::message(M_ERROR, "id out of range\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT max_len = (INT) mxGetScalar(mx_max_len_field) ;
-//       if (max_len<0 || max_len>1024*1024*100)
-//         {
-//           CIO::message(M_ERROR, "max_len out of range\n") ;
-//                     delete_penalty_struct_array(PEN,P) ;
-//                     return NULL ;
-//         }
-//       PEN[id].max_len = max_len ;
-//
-//       INT min_len = (INT) mxGetScalar(mx_min_len_field) ;
-//       if (min_len<0 || min_len>1024*1024*100)
-//         {
-//           CIO::message(M_ERROR, "min_len out of range\n") ;
-//                     delete_penalty_struct_array(PEN,P) ;
-//                     return NULL ;
-//         }
-//       PEN[id].min_len = min_len ;
-//       
-//       if (PEN[id].id!=-1)
-//         {
-//           CIO::message(M_ERROR, "penalty id already used\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       PEN[id].id=id ;
-//       if (next_id>=0)
-//         PEN[id].next_pen=&PEN[next_id] ;
-//       //fprintf(stderr,"id=%i, next_id=%i\n", id, next_id) ;
-//       
-//       assert(next_id!=id) ;
-//       PEN[id].use_svm=use_svm ;
-//       PEN[id].limits = new REAL[len] ;
-//       PEN[id].penalties = new REAL[len] ;
-//       double * limits = mxGetPr(mx_limits_field) ;
-//       double * penalties = mxGetPr(mx_penalties_field) ;
-//       
-//       for (INT i=0; i<len; i++)
-//         {
-//           PEN[id].limits[i]=limits[i] ;
-//           PEN[id].penalties[i]=penalties[i] ;
-//         }
-//       PEN[id].len = len ;
-//       
-//       char *transform_str = mxArrayToString(mx_transform_field) ;                           
-//       char *name_str = mxArrayToString(mx_name_field) ;                             
-//       
-//       if (strcmp(transform_str, "log")==0)
-//         PEN[id].transform = T_LOG ;
-//       else if (strcmp(transform_str, "log(+1)")==0)
-//         PEN[id].transform = T_LOG_PLUS1 ;   
-//       else if (strcmp(transform_str, "log(+3)")==0)
-//         PEN[id].transform = T_LOG_PLUS3 ;   
-//       else if (strcmp(transform_str, "(+3)")==0)
-//         PEN[id].transform = T_LINEAR_PLUS3 ;        
-//       else if (strcmp(transform_str, "")==0)
-//         PEN[id].transform = T_LINEAR ;      
-//       else
-//         {
-//           delete_penalty_struct_array(PEN,P) ;
-//           mxFree(transform_str) ;
-//           return NULL ;
-//         }
-//       PEN[id].name = new char[strlen(name_str)+1] ;
-//       strcpy(PEN[id].name, name_str) ;
-//       
-//       init_penalty_struct_cache(PEN[id]) ;
-//         
-//       mxFree(transform_str) ;
-//       mxFree(name_str) ;
-//     }
-//     return PEN ;
-//}
-
-REAL lookup_penalty_svm(const struct penalty_struct *PEN, INT p_value, REAL *d_values)
-{      
-       if (PEN==NULL)
-               return 0 ;
-       assert(PEN->use_svm>0) ;
-       REAL d_value=d_values[PEN->use_svm-1] ;
-       //fprintf(stderr,"transform=%i, d_value=%1.2f\n", (INT)PEN->transform, d_value) ;
-       
-       switch (PEN->transform)
-       {
-       case T_LINEAR:
-               break ;
-       case T_LOG:
-               d_value = log(d_value) ;
-               break ;
-       case T_LOG_PLUS1:
-               d_value = log(d_value+1) ;
-               break ;
-       case T_LOG_PLUS3:
-               d_value = log(d_value+3) ;
-               break ;
-       case T_LINEAR_PLUS3:
-               d_value = d_value+3 ;
-               break ;
-       default:
-               CIO::message(M_ERROR, "unknown transform\n") ;
-               break ;
-       }
-       
-       INT idx = 0 ;
-       REAL ret ;
-       for (INT i=0; i<PEN->len; i++)
-               if (PEN->limits[i]<=d_value)
-                       idx++ ;
-       
-       if (idx==0)
-               ret=PEN->penalties[0] ;
-       else if (idx==PEN->len)
-               ret=PEN->penalties[PEN->len-1] ;
-       else
-       {
-               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
-                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
-       }
-       
-       //fprintf(stderr,"ret=%1.2f\n", ret) ;
-
-       if (PEN->next_pen)
-               ret+=lookup_penalty(PEN->next_pen, p_value, d_values);
-       
-       //fprintf(stderr,"ret=%1.2f\n", ret) ;
-       
-       return ret ;
-}
-
-REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
-                                       REAL* svm_values, bool follow_next)
-{      
-       if (PEN==NULL)
-               return 0 ;
-       if (PEN->use_svm)
-               return lookup_penalty_svm(PEN, p_value, svm_values) ;
-               
-       if ((p_value<PEN->min_len) || (p_value>PEN->max_len))
-               return -CMath::INFTY ;
-       
-       if (PEN->cache!=NULL && (p_value>=0) && (p_value<=PEN->max_len))
-       {
-               REAL ret=PEN->cache[p_value] ;
-               if (PEN->next_pen && follow_next)
-                       ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
-               return ret ;
-       }
-       
-       REAL d_value = (REAL) p_value ;
-       switch (PEN->transform)
-       {
-       case T_LINEAR:
-               break ;
-       case T_LOG:
-               d_value = log(d_value) ;
-               break ;
-       case T_LOG_PLUS1:
-               d_value = log(d_value+1) ;
-               break ;
-       case T_LOG_PLUS3:
-               d_value = log(d_value+3) ;
-               break ;
-       case T_LINEAR_PLUS3:
-               d_value = d_value+3 ;
-               break ;
-       default:
-               CIO::message(M_ERROR, "unknown transform\n") ;
-               break ;
-       }
-
-       INT idx = 0 ;
-       REAL ret ;
-       for (INT i=0; i<PEN->len; i++)
-               if (PEN->limits[i]<=d_value)
-                       idx++ ;
-       
-       if (idx==0)
-               ret=PEN->penalties[0] ;
-       else if (idx==PEN->len)
-               ret=PEN->penalties[PEN->len-1] ;
-       else
-       {
-               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
-                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
-       }
-       //if (p_value>=30 && p_value<150)
-       //fprintf(stderr, "%s %i(%i) -> %1.2f\n", PEN->name, p_value, idx, ret) ;
-       
-       if (PEN->next_pen && follow_next)
-               ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
-
-       return ret ;
-}
diff --git a/QPalmaDP/penalty_info.h b/QPalmaDP/penalty_info.h
deleted file mode 100644 (file)
index 30de4aa..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef __PENALTY_INFO_H__
-#define __PENALTY_INFO_H__
-
-#include "common.h"
-#include "Mathmatics.h"
-
-enum ETransformType
-{
-       T_LINEAR,
-       T_LOG,
-       T_LOG_PLUS1,
-       T_LOG_PLUS3,
-       T_LINEAR_PLUS3
-}  ;
-
-struct penalty_struct
-{
-       INT len ;
-       REAL *limits ;
-       REAL *penalties ;
-       INT max_len ;
-       INT min_len ;
-       REAL *cache ;
-       enum ETransformType transform ;
-       INT id ;
-       struct penalty_struct *next_pen ;
-       char * name ;
-       INT use_svm ;
-} ;
-
-void init_penalty_struct(struct penalty_struct &PEN) ;
-void delete_penalty_struct_palma(struct penalty_struct &PEN) ;
-//void delete_penalty_struct_array(struct penalty_struct *PEN, INT len) ;
-void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp);
-
-//#ifdef HAVE_MATLAB
-//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, mwSize &P) ;
-//#endif
-
-REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
-                                       REAL* svm_values, bool follow_next=true) ;
-
-enum mode { NORMAL, USE_QUALITY_SCORES};
-
-#endif
diff --git a/QPalmaDP/print_align.cpp b/QPalmaDP/print_align.cpp
deleted file mode 100644 (file)
index 312cabc..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-#include "fill_matrix.h"
-using namespace std;
-
-void print_align(Pre_score* matrix, int length_est,  int length_dna,Align_pair* vektor, int result_length, int print_matrix)
-  
-{
-  //Druck der matrix
-  switch(print_matrix)
-    {
-    case 1:
-      {
-       cout <<"Matrix: \n";
-       
-       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
-         {
-           cout << "Zeile " << k << ":\t"; 
-           for (int l= 0; l<length_dna; l++)
-             {
-               cout << ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-             } 
-           cout <<"\n";
-         }
-       break;  
-      }
-      
-    case 2:
-      {
-       cout <<"Matrix: \n";
-       
-       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
-         {
-           cout << "Zeile " << k << ":\t"; 
-           for (int l= 0; l<length_dna; l++)
-             {
-               if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k-1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
-                 cout << "\\" <<((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
-                 cout << "<"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k -1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l))
-                 cout << "|"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-               else             { cout << ((Pre_score*)matrix + (k*length_dna) + l)->value << "("; 
-               cout << ((Pre_score*)matrix + (k*length_dna) + l)->prev_i << "," << ((Pre_score*)matrix + (k*length_dna) + l)->prev_j << ")\t";  
-               }
-               
-             } 
-           cout <<"\n";
-         }
-        
-       cout << "length_est" << length_est << "\n";
-        cout << "length_dna" << length_dna << "\n";
-        cout << "result_length: " << result_length << "\n";
-       
-       
-       break;  
-      } 
-      
-    case 3:
-      {
-       //Druck des Alignment
-       
-       // cout << "Alignment\n" ;
-       int j = 0;
-       // cout << "resultlength: " << result_length <<"\n";
-       while (j<result_length) {
-         int pacman = j;       
-         cout << "D:(" <<j<<")\t";
-         for (int i=0; ((j<result_length) && (i<50)); i++)
-           {
-             cout << vektor[j].dna_char << " ";        
-             j++;      
-           }
-         
-         cout << "\n"; 
-         
-         cout << "E:(" <<pacman<<")\t";
-         for (int i=0;((pacman<result_length) && (i<50)); i++)
-           {
-             cout << vektor[pacman].est_char << " "; 
-             pacman++;
-           }
-         cout << "\n\n";       
-       }               
-       
-       //Ende Druck Alignment
-       break ;
-      }
-      
-    default:
-      {
-       cout <<"";
-      }
-    }
-  
-  
-  
-}
-
diff --git a/QPalmaDP/qpalma_dp.cpp b/QPalmaDP/qpalma_dp.cpp
deleted file mode 100644 (file)
index 7786745..0000000
+++ /dev/null
@@ -1,277 +0,0 @@
-#include "qpalma_dp.h"
-#include <cstring>
-
-using namespace std;
-
-/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
-  myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
-  print_matrix) */
-
-Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
-      len = 0;
-      limits = 0;
-      penalties = 0;
-      max_len = 0;
-      min_len = 0;
-      cache = 0;
-      enum ETransformType transform = T_LINEAR;
-      id = 0;
-      name = 0;
-      use_svm = 0;
-
-      // set ptrs to zero first
-      splice_align         = 0;
-      est_align            = 0;
-      mmatrix_param        = 0;
-      alignmentscores      = 0;
-      qualityScoresAllPaths = 0;
-      mlen = 6; // score matrix: length of 6 for "- A C G T N"
-
-      numQualSuppPoints = numq;
-      numPlifs = numQPlifs;
-      use_quality_scores = use_qscores;
-
-      //printf("number of support points: %d\n",numQualSuppPoints);
-      //printf("number of plifs: %d\n",numPlifs );
-      FA( numQualSuppPoints >= 0 );
-      FA( numPlifs >= 0 );
-}
-
-void Alignment::getDNAEST(){}
-
-void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
-      int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
-      double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores, 
-      bool remove_duplicate_scores, bool print_matrix) {
-
-   // printf("Entering myalign...\n");
-   dna_len =  dna_len_p + 1 ;
-   est_len =  est_len_p + 1 ;
-   nr_paths = nr_paths_p;
-
-  /***************************************************************************/ 
-  // initialize alignment matrices and call fill_matrix()  
-  /***************************************************************************/
-
-  mode currentMode;
-  if (use_quality_scores) 
-   currentMode = USE_QUALITY_SCORES;
-  else
-   currentMode = NORMAL;
-
-    // dnaest
-   double *DNA_ARRAY = 0;
-   double *EST_ARRAY = 0;
-  
-  //possible donor positions
-  int nr_donor_sites = 0 ;
-  for (int ii=0; ii<d_len; ii++) {
-    if(isfinite(donor[ii])) {
-      nr_donor_sites++ ;
-    }
-  }
-
-  int* donor_sites = new int[nr_donor_sites];
-  int donor_idx = 0 ;
-  for (int ii=0; ii<d_len; ii++) {
-    if(isfinite(donor[ii])) {
-      donor_sites[donor_idx] = ii+1 ;
-      donor_idx++ ;
-    }  
-  }
-
-  int* max_score_positions = new int[nr_paths*2];
-
-  Pre_score** matrices = new Pre_score*[nr_paths];
-  for (int z=0; z<nr_paths; z++) {
-      matrices[z] = new Pre_score[dna_len * est_len];
-  }
-
-   //printf("qualityScores\n");
-   //for(int qidx=0;qidx<numPlifs;qidx++) {
-   //   penalty_struct p; 
-   //   p = qualityScores[qidx];
-   //   printf("%d ",qidx);
-   //   for(int pidx=0;pidx<p.len;pidx++)
-   //      printf("%f ",pidx,p.penalties[pidx]);
-   //   printf("\n");
-   //}
-  
-  //printf("calling fill_matrix...\n");
-  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions,currentMode);
-  //printf("after call to fill_matrix...\n");
-
-  /***************************************************************************/ 
-  // return arguments etc.
-  /***************************************************************************/
-  int result_length; //Eine Variable fuer alle Matrizen
-
-   splice_align_size = (dna_len-1)*nr_paths;
-   est_align_size = (est_len-1)*nr_paths;
-
-   int mmatrix_size;
-
-   if (currentMode == USE_QUALITY_SCORES) {
-      mmatrix_param_size = mlen*nr_paths;
-      mmatrix_size = mlen;
-   }
-
-   if (currentMode == NORMAL) {
-      mmatrix_param_size = (mlen*mlen)*nr_paths;
-      mmatrix_size = mlen*mlen;
-   }
-
-   alignmentscores_size = nr_paths; //alignment score for each path/matrix
-   numPathsPlifs = numPlifs*nr_paths; //alignment score for each path/matrix
-
-   splice_align         = new int[splice_align_size];
-   est_align            = new int[est_align_size];
-   mmatrix_param        = new int[mmatrix_param_size];
-   alignmentscores      = new double[nr_paths]; //alignment score for each path/matrix
-
-
-  // printf("before memset...\n");
-  memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
-  memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
-  memset((char*)mmatrix_param, 0, mmatrix_size*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
-  memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
-  //printf("after memset...\n");
-  
-  qualityScoresAllPaths= new penalty_struct*[nr_paths];
-
-  for (int z=0; z<nr_paths; z++) {
-    result_length = 0 ;
-    
-    int* s_align = splice_align + (dna_len-1)*z;  //pointer
-    int* e_align = est_align + (est_len-1)*z ;    //pointer
-    int* mparam  = mmatrix_param + mmatrix_size*z; //pointer
-
-   qualityScoresAllPaths[z] = new penalty_struct[numPlifs];
-
-   int qidx, pidx;
-   for(qidx=0;qidx<numPlifs;qidx++) {
-      penalty_struct p;
-      p.len = numQualSuppPoints;
-      p.limits = (REAL*) calloc(p.len,sizeof(REAL));
-
-      for(pidx=0;pidx<p.len;pidx++)
-         p.limits[pidx] = qualityScores[qidx%numPlifs].limits[pidx];
-
-      p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
-      qualityScoresAllPaths[z][qidx] = p;
-   }
-
-   //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
-
-    //printf("before call to result_align...\n");
-    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityScoresAllPaths[z] , currentMode);
-    //printf("after call to result_align...\n");
-
-   //printf("z is %d\n",z);
-   //for(qidx=0;qidx<numPlifs;qidx++) {
-   //   penalty_struct p;
-   //   p = qualityScoresAllPaths[qidx+numPlifs*z];
-   //   printf("%d ",qidx);
-   //   for(pidx=0;pidx<p.len;pidx++)
-   //      printf("%f ",pidx,p.penalties[pidx]);
-   //   printf("\n");
-   //}
-
-   //if(DNA_ARRAY != 0) {
-   //    delete[] DNA_ARRAY;
-   //    delete[] EST_ARRAY;
-   // }
-
-   //DNA_ARRAY = new double[result_length];
-   //EST_ARRAY = new double[result_length];
-
-   // //backtracking
-   // int i = max_score_positions[2*z] ; //i (est)
-   // int j = max_score_positions[2*z +1] ; //j (dna)
-   // int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
-   // int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
-   // int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
-   // 
-   // for (int ii=result_length; ii>0; ii--) {
-   //   if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-   //   }
-   //   else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
-       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //EST_ARRAY[ii-1] = 0 ; //gap
-   //   }
-   //   else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
-       //DNA_ARRAY[ii-1] = 0 ; //gap
-       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-   //   }
-   //   else {// splice site
-       //for (j; j > prev_j; j--) {
-       //  DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       //  EST_ARRAY[ii-1] = 6 ; //intron
-       //  ii-- ;
-       //}
-       //ii++ ; // last ii-- too much (because done in for-loop)       
-   //   }
-   //   
-   //   i = prev_i;
-   //   j = prev_j;    
-   //   prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
-   //   prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
-   //   prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
-   // }
-   // 
-   } //end of z
-
-   //if(DNA_ARRAY != 0) {
-   // delete[] DNA_ARRAY;
-   // delete[] EST_ARRAY;
-   // }
-
-   for (int i=nr_paths-1;i>=0; i--)
-      delete[] matrices[i];
-}
-
-void Alignment::getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores, double* qScores) {
-
-   int idx;
-   for(idx=0; idx<splice_align_size; idx++)
-      s_align[idx] = splice_align[idx];
-
-   for(idx=0; idx<est_align_size; idx++)
-      e_align[idx] =  est_align[idx];
-
-   for(idx=0; idx<mmatrix_param_size; idx++)
-      mmatrix_p[idx] = mmatrix_param[idx];
-
-   for(idx=0; idx<alignmentscores_size; idx++)
-      alignscores[idx] = alignmentscores[idx];
-   
-   if (use_quality_scores) {
-      penalty_struct currentPlif;
-      int ctr=0;
-      for (int z=0; z<nr_paths; z++) {
-         for(int estChar=1;estChar<6;estChar++) {
-            for(int dnaChar=0;dnaChar<6;dnaChar++) {
-               int currentPos = (estChar-1)*6+dnaChar;
-               currentPlif = qualityScoresAllPaths[z][currentPos];
-               for(int pidx=0; pidx<currentPlif.len; pidx++) {
-                  qScores[ctr] = currentPlif.penalties[pidx];
-                  ctr++;
-               }
-      }}}
-   }
-
-   //for(idx=0; idx<numPathsPlifs; idx++) {
-   //   currentPlif = qualityScoresAllPaths[idx];
-   //   //printf("Size is %dx%d\n",numPathsPlifs,currentPlif.len);
-   //   for(pidx=0; pidx<currentPlif.len; pidx++) {
-   //      qScores[ctr] = currentPlif.penalties[pidx];
-   //      ctr++;
-   //   }
-   //}
-
-   //printf("\nctr is %d\n",ctr);
-   //printf("Leaving getAlignmentResults...\n");
-}
diff --git a/QPalmaDP/qpalma_dp.h b/QPalmaDP/qpalma_dp.h
deleted file mode 100644 (file)
index 4049221..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-#ifndef _QPALMA_DP_H_
-#define _QPALMA_DP_H_
-
-#include "penalty_info.h"
-
-struct ArrayElem { //24B
-  double score;
-  int prev_j;
-  int prev_i;
-  int prev_matrix_no;   
-  bool isSplice;
-} ;
-
-struct pre_score { //24B
-  double value; //8
-  int prev_i; //4
-  int prev_j; //4
-  int prev_matrix_no;//4
-  bool issplice;//4
-};
-
-typedef struct pre_score Pre_score;
-
-typedef struct Align_pair { //24B
-  double value ; 
-  bool issplice;
-  int previ,prevj ;
-  char est_char;
-  char dna_char;
-};
-
-void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions, mode currentMode);
-
-int check_char(char base);
-void fassert(bool exp,int line, char* file);
-#define FA(expr) (fassert(expr,__LINE__,__FILE__))
-
-bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode);
-
-extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
-
-
-/* 
- * The function myalign calculates a new alignment given the scoring scheme
- * Its input arguments are:
- *
- * num_path(id)               ->    an integer specifying which alignment has to be done
- * dna                        ->    
- * est                        ->
- * h                          ->
- * mmatrix                    -> 
- * donor                      ->
- * acceptor                   ->
- * remove_duplicate_scores    -> a boolean
- * print_matrix               -> a boolean
- *
- * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
- *
- * the idea of the qualityScores array is as follows
- *
- * consider a matrix of 24 plifs
- * 
- * -> row major
- *
- */
-
-class Alignment {
-
-   private:
-      int* splice_align;
-      int* est_align;
-      int* mmatrix_param;
-      double* alignmentscores;
-      struct penalty_struct** qualityScoresAllPaths;
-
-      int dna_len;
-      int est_len;
-      int mlen;
-      int nr_paths;
-
-      int numQualSuppPoints;
-      int numPlifs;
-      bool use_quality_scores;
-
-      uint splice_align_size ;
-      uint est_align_size ;
-      uint mmatrix_param_size ;
-      uint alignmentscores_size ;
-      uint numPathsPlifs ;
-
-      INT len;
-      REAL *limits;
-      REAL *penalties;
-      INT max_len;
-      INT min_len;
-      REAL *cache;
-      enum ETransformType transform ;
-      INT id;
-      char * name;
-      INT use_svm;
-
-   public:
-      Alignment(int numQPlifs ,int numq, bool use_qscores);
-      ~Alignment() {
-         if(splice_align != 0)
-            delete[] splice_align;
-
-         if(est_align != 0)
-            delete[] est_align;
-
-         if(mmatrix_param != 0)
-            delete[] mmatrix_param;
-
-         if(alignmentscores != 0)
-            delete[] alignmentscores;
-
-         if(qualityScoresAllPaths != 0)
-            delete[] qualityScoresAllPaths;
-      }
-
-      void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
-      int est_len_p, double* prb, double* chastity, struct penalty_struct h, 
-      double* matchmatrix, int mm_len, double* donor, int d_len, double* acceptor,
-      int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
-      bool print_matrix);
-
-      void getDNAEST();
-      void getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores, double* qScores);
-};
-
-#endif  // _QPALMA_DP_H_
-
diff --git a/QPalmaDP/result_align.cpp b/QPalmaDP/result_align.cpp
deleted file mode 100644 (file)
index d0c6944..0000000
+++ /dev/null
@@ -1,246 +0,0 @@
-#include "assert.h"
-#include "fill_matrix.h"
-#include "debug_tools.h"
-
-using namespace std;
-
-void increaseFeatureCount(penalty_struct* qparam, int dnaChar, int estChar, double estprb) {
-
-   FA(estChar > 0 && estChar < 6);
-   FA(dnaChar > -1 && dnaChar < 6);
-
-   int currentPos = (estChar-1)*6+dnaChar;
-
-   penalty_struct currentStruct = qparam[currentPos];
-
-   //printf("Current index %d est/dna: %d %d score %f\n",currentPos,estChar,dnaChar,estprb);
-
-   //printf("before\n");
-   //int p_idx;
-   //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
-   //   printf("%f ",currentStruct.limits[p_idx]);
-   //}
-   //printf("\n");
-
-   //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
-   //   printf("%f ",currentStruct.penalties[p_idx]);
-   //}
-   //printf("\n");
-
-   double value = estprb;
-   int Lower = 0;
-   int idx;
-
-   for (idx=0;idx<currentStruct.len;idx++) {
-      if (currentStruct.limits[idx] <= value)
-         Lower++;
-   }
-
-   if (Lower == 0) {
-         currentStruct.penalties[0] += 1;
-         qparam[currentPos] = currentStruct;
-         return;
-   }
-
-   if (Lower == currentStruct.len) {
-         currentStruct.penalties[currentStruct.len-1] += 1;
-         qparam[currentPos] = currentStruct;
-         return;
-   }
-
-   Lower -= 1;
-   int Upper = Lower+1; // x-werte bleiben fest
-
-   double weightup  = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
-   double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
-   currentStruct.penalties[Upper] += weightup;
-   currentStruct.penalties[Lower] += weightlow; 
-
-   //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
-   //printf("after\n");
-   //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
-   //   printf("%f ",currentStruct.limits[p_idx]);
-   //}
-   //printf("\n");
-   //for(p_idx=0;p_idx<currentStruct.len;p_idx++)
-   //   printf("%f ",currentStruct.penalties[p_idx]);
-   //printf("\n");
-   qparam[currentPos] = currentStruct;
-}
-
-bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
-//Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
-
-{  
-  const int mlen=6; // length of matchmatrix
-  int startend[4] ; // [i_start, j_start, i_end, j_end] ;
-  startend[3] = max_score_positions[2*z] ; //i (est)
-  startend[4] = max_score_positions[2*z +1] ; //j (dna)
-  //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
-  int dnanum ; //using check_char: letter -> number
-  int estnum ; //using check_char: letter -> number
-
-  double prbnum ;
-  double chastitynum ;
-
-  int dna_pos ;
-  int est_pos ;
-  int splice_state ;
-  int est_state ;
-  for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
-    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
-  }
-  for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
-    e_align[est_pos-1] = 4 ;
-  }
-  //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
-
-  /* starting point for backtracking */
-  int i = startend[3] ; 
-  int j = startend[4] ;
-  int act_m_no  = z ;
-  alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
-  //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
-
-  int prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i; 
-  int prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
-  int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
-
-  //printf("Current z is %d\n",z);
-
-  if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
-  {
-    cout << "No additional path left (inf)!\n";
-    return 1;
-  }
-
-  if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
-    cout << "No additional path left (0)!\n";
-    return 1;
-  }
-
-  assert(dna_pos == j) ;
-  assert(est_pos == i) ;
-
-  splice_state = 0 ; //exon
-  est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
-
-  //compute length of alignment (between max(m,n) and m+n)     
-  //local_alignment: backtracking to first Zero: look at actual value
-  while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
-    
-    //just some info
-    //if ((z>0) && (!(z==prev_m_no))) {
-    //  printf("(%d,%d): next step in next matrix\n", i,j) ;
-    //}
-
-    if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-      (*result_length_ptr)= (*result_length_ptr) + 1;
-      
-      s_align[dna_pos-1] = splice_state; 
-      dna_pos-- ;
-      e_align[est_pos-1] = est_state ; //1 or 2, depended
-      est_pos-- ;
-      
-      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
-      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
-
-      prbnum = prb[i-1];
-      chastitynum = chastity[i-1];
-
-      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
-
-      if(currentMode == USE_QUALITY_SCORES)
-         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
-      else
-         mparam[mlen*dnanum+estnum] ++ ;
-
-    }
-    else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
-      (*result_length_ptr)= (*result_length_ptr) + 1;
-      
-      s_align[dna_pos-1] = splice_state; 
-      dna_pos-- ;
-
-      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
-      estnum = 0 ; //gap
-
-      if(currentMode == USE_QUALITY_SCORES)
-         mparam[dnanum] ++ ;
-      else
-         mparam[mlen*dnanum +estnum] ++ ;
-
-    }
-    else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
-      (*result_length_ptr)= (*result_length_ptr) + 1;
-      
-      e_align[est_pos-1] = est_state ;  //1 or 2, depended
-      est_pos-- ;
-
-      dnanum = 0 ; //gap
-      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
-
-      prbnum = prb[i-1];
-      chastitynum = chastity[i-1];
-
-      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
-
-      if(currentMode == USE_QUALITY_SCORES)
-         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
-      else
-         mparam[mlen*dnanum +estnum] ++ ;
-    }
-    else {// splice site
-      (*result_length_ptr) =  (*result_length_ptr) + (j - prev_j);
-      if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
-       printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
-       printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
-       printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
-       printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
-       printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
-       printf("j: %d -> prev_j:%d\n", j, prev_j) ;
-       printf("i: %d -> prev_i:%d\n", i, prev_i) ;
-      }
-      if (est_state==1) { //was exon labeled "1"
-       est_state = 2 ; //new exon is labeled "2"
-      }
-      else {
-       est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
-      }
-      for (j; j > prev_j; j--) {
-       if (splice_state == 0) { //coming from exon
-         splice_state = 2 ; //first intron_state: acceptor
-       } else {
-         splice_state = 3 ; //intron
-       }
-       if (j-1 == prev_j) { //last intron_state: donor
-         splice_state = 1 ;//donor
-       }
-       s_align[dna_pos-1] = splice_state; 
-       dna_pos-- ;
-      }
-      splice_state = 0 ; //exon again
-      
-    }
-   
-    startend[1] = i;
-    startend[2] = j;
-    i = prev_i;
-    j = prev_j;
-    act_m_no = prev_m_no ;
-    
-    prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i; 
-    prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j; 
-    prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
-  }
-  for (dna_pos; dna_pos>0; dna_pos--) {
-    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
-  }
-  for (est_pos; est_pos>0; est_pos--) {
-    e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
-  }
-
-  //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
-  //printf("result_length: %d\n", *result_length_ptr) ;
-  return 0;
-}
diff --git a/dyn_prog/Makefile b/dyn_prog/Makefile
new file mode 100644 (file)
index 0000000..0ccad31
--- /dev/null
@@ -0,0 +1,33 @@
+SRCS= Mathmatics.cpp\
+               fill_matrix.cpp\
+               qpalma_dp.cpp\
+               result_align.cpp\
+               penalty_info.cpp\
+               print_align.cpp\
+               io.cpp
+
+HDRS= Mathmatics.h\
+               common.h\
+               config.h\
+               debug_tools.h\
+               fill_matrix.h\
+               io.h\
+               penalty_info.h\
+               qpalma_dp.h
+
+OBJS = $(SRCS:%.cpp=%.o)
+
+#CXXFLAGS=-O3 -fPIC
+#CXXFLAGS=-O3 -fPIC -pg -fprofile-arcs
+CXXFLAGS=-O3 -fPIC -ggdb
+
+IF=QPalmaDP
+
+all: $(OBJS) $(HDRS)
+       swig -c++ -python ${IF}.i
+       g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+       g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+       python -c "import ${IF}"
+
+clean:
+       rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
diff --git a/dyn_prog/Mathmatics.cpp b/dyn_prog/Mathmatics.cpp
new file mode 100644 (file)
index 0000000..61298da
--- /dev/null
@@ -0,0 +1,349 @@
+// Math.cpp: implementation of the CMath class.
+//
+//////////////////////////////////////////////////////////////////////
+
+
+#include "Mathmatics.h"
+#include "io.h"
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+#include <assert.h>
+
+//////////////////////////////////////////////////////////////////////
+// Construction/Destruction
+//////////////////////////////////////////////////////////////////////
+
+#ifdef USE_LOGCACHE
+//gene/math specific constants
+#ifdef USE_HMMDEBUG
+#define MAX_LOG_TABLE_SIZE 10*1024*1024
+#define LOG_TABLE_PRECISION 1e-6
+#else
+#define MAX_LOG_TABLE_SIZE 123*1024*1024
+#define LOG_TABLE_PRECISION 1e-15
+#endif
+
+INT CMath::LOGACCURACY         = 0; // 100000 steps per integer
+#endif
+
+INT CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
+
+#ifdef USE_PATHDEBUG
+const REAL CMath::INFTY            =  1e11;            // infinity
+#else
+const REAL CMath::INFTY            =  -log(0.0);       // infinity
+#endif
+const REAL CMath::ALMOST_NEG_INFTY =  -1000;   
+
+CHAR CMath::rand_state[256];
+
+CMath::CMath()
+{
+       struct timeval tv;
+       gettimeofday(&tv, NULL);
+       UINT seed=(UINT) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
+       initstate(seed, CMath::rand_state, sizeof(CMath::rand_state));
+       CIO::message(M_INFO, "seeding random number generator with %u\n", seed);
+
+#ifdef USE_LOGCACHE
+    LOGRANGE=CMath::determine_logrange();
+    LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
+    CIO::message(M_INFO, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE,LOGACCURACY,sizeof(REAL),LOGRANGE*LOGACCURACY*sizeof(REAL)/(1024.0*1024.0)) ;
+   
+    CMath::logtable=new REAL[LOGRANGE*LOGACCURACY];
+    init_log_table();
+    CIO::message(M_INFO, "Done.\n") ;
+#else
+       INT i=0;
+       while ((REAL)log(1+((REAL)exp(-REAL(i)))))
+               i++;
+    CIO::message(M_INFO, "determined range for x in log(1+exp(-x)) is:%d\n", i);
+       LOGRANGE=i;
+#endif 
+}
+
+CMath::~CMath()
+{
+#ifdef USE_LOGCACHE
+       delete[] logtable;
+#endif
+}
+
+#ifdef USE_LOGCACHE
+INT CMath::determine_logrange()
+{
+    INT i;
+    REAL acc=0;
+    for (i=0; i<50; i++)
+    {
+       acc=((REAL)log(1+((REAL)exp(-REAL(i)))));
+       if (acc<=(REAL)LOG_TABLE_PRECISION)
+           break;
+    }
+
+    CIO::message(M_INFO, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
+    return i;
+}
+
+INT CMath::determine_logaccuracy(INT range)
+{
+    range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(REAL));
+    CIO::message(M_INFO, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
+    return range;
+}
+
+//init log table of form log(1+exp(x))
+void CMath::init_log_table()
+{
+  for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
+    logtable[i]=log(1+exp(REAL(-i)/REAL(LOGACCURACY)));
+}
+#endif
+
+void CMath::sort(INT *a, INT cols, INT sort_col)
+{
+  INT changed=1;
+  if (a[0]==-1) return ;
+  while (changed)
+  {
+      changed=0; INT i=0 ;
+      while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
+         {
+                 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
+                 {
+                         for (INT j=0; j<cols; j++)
+                                 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
+                         changed=1 ;
+                 } ;
+                 i++ ;
+         } ;
+  } ;
+} 
+
+void CMath::sort(REAL *a, INT* idx, INT N) 
+{
+
+       INT changed=1;
+       while (changed)
+       {
+               changed=0;
+               for (INT i=0; i<N-1; i++)
+               {
+                       if (a[i]>a[i+1])
+                       {
+                               swap(a[i],a[i+1]) ;
+                               swap(idx[i],idx[i+1]) ;
+                               changed=1 ;
+                       } ;
+               } ;
+       } ;
+        
+} 
+
+
+
+//plot x- axis false positives (fp) 1-Specificity
+//plot y- axis true positives (tp) Sensitivity
+INT CMath::calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile)
+{
+       INT left=0;
+       INT right=size-1;
+       INT i;
+
+       for (i=0; i<size; i++)
+       {
+               if (!(label[i]== -1 || label[i]==1))
+                       return -1;
+       }
+
+       //sort data such that -1 labels first +1 behind
+       while (left<right)
+       {
+               while ((label[left] < 0) && (left<right))
+                       left++;
+               while ((label[right] > 0) && (left<right))      //warning: label must be either -1 or +1
+                       right--;
+
+               swap(output[left],output[right]);
+               swap(label[left], label[right]);
+       }
+
+       // neg/pos sizes
+       negsize=left;
+       possize=size-left;
+       REAL* negout=output;
+       REAL* posout=&output[left];
+
+       // sort the pos and neg outputs separately
+       qsort(negout, negsize);
+       qsort(posout, possize);
+
+       // set minimum+maximum values for decision-treshhold
+       REAL minimum=min(negout[0], posout[0]);
+       REAL maximum=minimum;
+       if (negsize>0)
+               maximum=max(maximum,negout[negsize-1]);
+       if (possize>0)
+               maximum=max(maximum,posout[possize-1]);
+
+       REAL treshhold=minimum;
+       REAL old_treshhold=treshhold;
+
+       //clear array.
+       for (i=0; i<size; i++)
+       {
+               fp[i]=1.0;
+               tp[i]=1.0;
+       }
+
+       //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
+       //everything right of {pos,neg}idx is considered to beLONG to +1
+       INT posidx=0;
+       INT negidx=0;
+       INT iteration=1;
+       INT returnidx=-1;
+
+       REAL minerr=10;
+
+       while (iteration < size && treshhold<=maximum)
+       {
+               old_treshhold=treshhold;
+
+               while (treshhold==old_treshhold && treshhold<=maximum)
+               {
+                       if (posidx<possize && negidx<negsize)
+                       {
+                               if (posout[posidx]<negout[negidx])
+                               {
+                                       if (posout[posidx]==treshhold)
+                                               posidx++;
+                                       else
+                                               treshhold=posout[posidx];
+                               }
+                               else
+                               {
+                                       if (negout[negidx]==treshhold)
+                                               negidx++;
+                                       else
+                                               treshhold=negout[negidx];
+                               }
+                       }
+                       else
+                       {
+                               if (posidx>=possize && negidx<negsize-1)
+                               {
+                                       negidx++;
+                                       treshhold=negout[negidx];
+                               }
+                               else if (negidx>=negsize && posidx<possize-1)
+                               {
+                                       posidx++;
+                                       treshhold=posout[posidx];
+                               }
+                               else if (negidx<negsize && treshhold!=negout[negidx])
+                                       treshhold=negout[negidx];
+                               else if (posidx<possize && treshhold!=posout[posidx])
+                                       treshhold=posout[posidx];
+                               else
+                               {
+                                       treshhold=2*(maximum+100); // force termination
+                                       posidx=possize;
+                                       negidx=negsize;
+                                       break;
+                               }
+                       }
+               }
+
+               //calc tp,fp
+               tp[iteration]=(possize-posidx)/(REAL (possize));
+               fp[iteration]=(negsize-negidx)/(REAL (negsize));
+
+               //choose poINT with minimal error
+               if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
+               {
+                       minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
+                       tresh=(old_treshhold+treshhold)/2;
+                       returnidx=iteration;
+               }
+
+               iteration++;
+       }
+
+       // set new size
+       size=iteration;
+
+       if (rocfile)
+       {
+               const CHAR id[]="ROC";
+               fwrite(id, sizeof(char), sizeof(id), rocfile);
+               fwrite(fp, sizeof(REAL), size, rocfile);
+               fwrite(tp, sizeof(REAL), size, rocfile);
+       }
+
+       return returnidx;
+}
+
+UINT CMath::crc32(BYTE *data, INT len)
+{
+    UINT        result;
+    INT                 i,j;
+    BYTE       octet;
+
+    result = 0-1;
+
+    for (i=0; i<len; i++)
+    {
+       octet = *(data++);
+       for (j=0; j<8; j++)
+       {
+           if ((octet >> 7) ^ (result >> 31))
+           {
+               result = (result << 1) ^ 0x04c11db7;
+           }
+           else
+           {
+               result = (result << 1);
+           }
+           octet <<= 1;
+       }
+    }
+
+    return ~result; 
+}
+
+double CMath::mutual_info(REAL* p1, REAL* p2, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               for (INT j=0; j<len; j++)
+                       e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
+
+       return e;
+}
+
+double CMath::relative_entropy(REAL* p, REAL* q, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               e+=exp(p[i])*(p[i]-q[i]);
+
+       return e;
+}
+
+double CMath::entropy(REAL* p, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               e-=exp(p[i])*p[i];
+
+       return e;
+}
diff --git a/dyn_prog/Mathmatics.h b/dyn_prog/Mathmatics.h
new file mode 100644 (file)
index 0000000..2e24d48
--- /dev/null
@@ -0,0 +1,546 @@
+#ifndef __MATHMATICS_H_
+#define __MATHMATICS_H_
+
+#include "common.h"
+#include "io.h"
+
+#include <math.h>
+#include <stdio.h>
+
+//define finite for win32
+#ifdef _WIN32
+#include <float.h>
+#define finite _finite
+#define isnan _isnan
+#endif
+
+#ifndef NAN
+#include <stdlib.h>
+#define NAN (strtod("NAN",NULL))
+#endif
+
+#ifdef SUNOS
+extern "C" int finite(double);
+#endif
+
+/** Mathematical Functions.
+ * Class which collects generic mathematical functions
+ */
+class CMath  
+{
+public:
+       /**@name Constructor/Destructor.
+        */
+       //@{
+       ///Constructor - initializes log-table
+       CMath();
+
+       ///Destructor - frees logtable
+       virtual ~CMath();
+       //@}
+
+       /**@name min/max/abs functions.
+       */
+       //@{
+
+       ///return the minimum of two integers
+       //
+       template <class T>
+       static inline T min(T a, T b)
+       {
+               return ((a)<=(b))?(a):(b);
+       }
+
+       ///return the maximum of two integers
+       template <class T>
+       static inline T max(T a, T b) 
+       {
+               return ((a)>=(b))?(a):(b);
+       }
+
+       ///return the maximum of two integers
+       template <class T>
+       static inline T abs(T a)
+       {
+               return ((a)>=(0))?(a):(-a);
+       }
+       //@}
+
+       /**@name misc functions */
+       //@{
+
+       /// crc32
+       static UINT crc32(BYTE *data, INT len);
+
+       static inline REAL round(REAL d)
+       {
+               return floor(d+0.5);
+       }
+
+       /// signum of type T variable a 
+       template <class T>
+       static inline REAL sign(REAL a)
+       {
+               if (a==0)
+                       return 0;
+               else return (a<0) ? (-1) : (+1);
+       }
+
+       /// swap floats a and b
+       template <class T>
+       static inline void swap(T & a,T &b)
+       {
+               T c=a ;
+               a=b; b=c ;
+       }
+
+       /// x^2
+       template <class T>
+       static inline T sq(T x)
+       {
+               return x*x;
+       }
+
+       static inline LONG factorial(INT n)
+       {
+               LONG res=1 ;
+               for (int i=2; i<=n; i++)
+                       res*=i ;
+               return res ;
+       }
+
+       static inline LONG nchoosek(INT n, INT k)
+       {
+               long res=1 ;
+
+               for (INT i=n-k+1; i<=n; i++)
+                       res*=i ;
+
+               return res/factorial(k) ;
+       }
+
+       /** performs a bubblesort on a given matrix a.
+        * it is sorted from in ascending order from top to bottom
+        * and left to right */
+       static void sort(INT *a, INT cols, INT sort_col=0) ;
+       static void sort(REAL *a, INT*idx, INT N) ;
+       
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending (for type T) */
+       template <class T>
+       static void qsort(T* output, INT size) ;
+
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending order 
+        * (for type T1) and returns the index (type T2)
+        * matlab alike [sorted,index]=sort(output) 
+        */
+       template <class T1,class T2>
+       static void qsort(T1* output, T2* index, INT size);
+
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending order
+        * (for type T1) and returns the index (type T2)
+        * matlab alike [sorted,index]=sort(output) 
+        */
+       template <class T1,class T2>
+       static void qsort_backward(T1* output, T2* index, INT size);
+
+     /* finds the smallest element in output and puts that element as the 
+               first element  */
+       template <class T>
+       static void min(REAL* output, T* index, INT size);
+
+     /* finds the n smallest elements in output and puts these elements as the 
+               first n elements  */
+       template <class T>
+       static void nmin(REAL* output, T* index, INT size, INT n);
+
+       /* performs a inplace unique of a vector of type T using quicksort 
+        * returns the new number of elements */
+       template <class T>
+       static INT unique(T* output, INT size) 
+       {
+               qsort(output, size) ;
+               INT i,j=0 ;
+               for (i=0; i<size; i++)
+                       if (i==0 || output[i]!=output[i-1])
+                               output[j++]=output[i] ;
+               return j ;
+       }
+
+       /* finds an element in a sorted array via binary search
+     * returns -1 if not found */
+       static inline INT fast_find(WORD* output, INT size, WORD elem)
+       {
+               INT start=0, end=size-1, middle=size/2 ;
+
+               if (output[start]>elem || output[end]<elem)
+                       return -1 ;
+
+               while (1)
+               {
+                       if (output[middle]>elem)
+                       {
+                               end = middle ;
+                               middle=start+(end-start)/2 ;
+                       } ;
+                       if (output[middle]<elem)
+                       {
+                               start = middle ;
+                               middle=start+(end-start)/2 ;
+                       }
+                       if (output[middle]==elem)
+                               return middle ;
+                       if (end-start<=1)
+                       {
+                               if (output[start]==elem)
+                                       return start ;
+                               if (output[end]==elem)
+                                       return end ;
+                               return -1 ;
+                       }
+               }
+       }
+
+       /* finds an element in a sorted array via binary search
+        * that is smaller-equal elem und the next element is larger-equal  */
+       static inline INT fast_find_range(REAL* output, INT size, REAL elem)
+       {
+               INT start=0, end=size-2, middle=(size-2)/2 ;
+
+               if (output[start]>elem)
+                       return -1 ;
+               if (output[end]<=elem)
+                       return size-1 ;
+
+               while (1)
+               {
+                       if (output[middle]>elem)
+                       {
+                               end = middle ;
+                               middle=start+(end-start)/2 ;
+                       } ;
+                       if (output[middle]<elem)
+                       {
+                               start = middle ;
+                               middle=start+(end-start)/2 ;
+                       }
+                       if (output[middle]<=elem && output[middle+1]>=elem)
+                               return middle ;
+                       if (end-start<=1)
+                       {
+                               if (output[start]<=elem && output[start+1]>=elem)
+                                       return start ;
+                               return end ;
+                       }
+               }
+       }
+
+       /** calculates ROC into (fp,tp)
+        * from output and label of length size 
+        * returns index with smallest error=fp+fn
+        */
+       static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
+       //@}
+
+       /// returns the mutual information of p which is given in logspace
+       /// where p,q are given in logspace
+       static double mutual_info(REAL* p1, REAL* p2, INT len);
+
+       /// returns the relative entropy H(P||Q), 
+       /// where p,q are given in logspace
+       static double relative_entropy(REAL* p, REAL* q, INT len);
+
+       /// returns entropy of p which is given in logspace
+       static double entropy(REAL* p, INT len);
+
+       /**@name summing functions */
+       //@{ 
+       /** sum logarithmic probabilities.
+        * Probability measures are summed up but are now given in logspace where
+        * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
+        * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
+#ifdef USE_LOGCACHE
+       static inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+               if (finite(p))
+               {
+                       if (finite(q))
+                       {
+
+                               register REAL diff=p-q;
+
+                               if (diff>0)             //p>q
+                               {
+                                       if (diff > LOGRANGE)
+                                               return p;
+                                       else
+                                               return  p + logtable[(int)(diff*LOGACCURACY)];
+                               }
+                               else                    //p<=q
+                               {
+                                       if (-diff > LOGRANGE)
+                                               return  q;
+                                       else
+                                               return  q + logtable[(int)(-diff*LOGACCURACY)];
+                               }
+                       }
+                       CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
+                       return NAN;
+               }
+               else 
+                       return q;
+       }
+
+       ///init log table of form log(1+exp(x))
+       static void init_log_table();
+       
+       /// determine INT x for that log(1+exp(-x)) == 0
+       static INT determine_logrange();
+
+       /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
+       static INT determine_logaccuracy(INT range);
+#else
+       /*
+       inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+           double result=comp_logarithmic_sum(p,q);
+
+           printf("diff:%f <-> %f\n",p-q, result);
+           return result;
+       }*/
+
+       static inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+               if (finite(p))
+               {
+                       if (finite(q))
+                       {
+
+                               register REAL diff=p-q;
+                               if (diff>0)             //p>q
+                               {
+                                       if (diff > LOGRANGE)
+                                               return p;
+                                       else
+                                               return  p + log(1+exp(-diff));
+                               }
+                               else                    //p<=q
+                               {
+                                       if (-diff > LOGRANGE)
+                                               return  q;
+                                       else
+                                               return  q + log(1+exp(diff));
+                               }
+                       }
+                       return p;
+               }
+               else 
+                       return q;
+       }
+#endif
+#ifdef LOG_SUM_ARRAY
+       /** sum up a whole array of values in logspace.
+        * This function addresses the numeric instabiliy caused by simply summing up N elements by adding 
+        * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
+        * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
+        */
+       static inline REAL logarithmic_sum_array(REAL *p, INT len)
+         {
+           if (len<=2)
+             {
+               if (len==2)
+                 return logarithmic_sum(p[0],p[1]) ;
+               if (len==1)
+                 return p[0];
+               return -INFTY ;
+             }
+           else
+             {
+               register REAL *pp=p ;
+               if (len%2==1) pp++ ;
+               for (register INT j=0; j < len>>1; j++)
+                 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
+             }
+           return logarithmic_sum_array(p,len%2+len>>1) ;
+         } 
+#endif
+       //@}
+public:
+       /**@name constants*/
+       //@{
+       /// infinity
+       static const REAL INFTY;
+       
+       /// almost neg (log) infinity
+       static const REAL ALMOST_NEG_INFTY;
+
+       /// range for logtable: log(1+exp(x))  -LOGRANGE <= x <= 0
+       static INT LOGRANGE;
+       
+#ifdef USE_LOGCACHE    
+       /// number of steps per integer
+       static INT LOGACCURACY;
+       //@}
+protected:
+       ///table with log-values
+       static REAL* logtable;  
+#endif
+       static CHAR rand_state[256];
+};
+
+
+//implementations of template functions
+template <class T>
+void CMath::qsort(T* output, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] > output [1])
+                       swap(output[0],output[1]);
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+
+               INT left=0;
+               INT right=size-1;
+
+               while (left<=right)
+               {
+                       while (output[left] < split)
+                               left++;
+                       while (output[right] > split)
+                               right--;
+
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               left++;
+                               right--;
+                       }
+               }
+
+               if (right+1> 1)
+                       qsort(output,right+1);
+
+               if (size-left> 1)
+                       qsort(&output[left],size-left);
+       }
+}
+
+template <class T1,class T2>
+void CMath::qsort(T1* output, T2* index, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] > output [1]){
+                       swap(output[0],output[1]);
+                       swap(index[0],index[1]);
+               }
+               
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+               
+               INT left=0;
+               INT right=size-1;
+               
+               while (left<=right)
+               {
+                       while (output[left] < split)
+                               left++;
+                       while (output[right] > split)
+                               right--;
+                       
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               swap(index[left],index[right]);
+                               left++;
+                               right--;
+                       }
+               }
+               
+               if (right+1> 1)
+                       qsort(output,index,right+1);
+               
+               if (size-left> 1)
+                       qsort(&output[left],&index[left], size-left);
+       }
+}
+
+template <class T1,class T2>
+void CMath::qsort_backward(T1* output, T2* index, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] < output [1]){
+                       swap(output[0],output[1]);
+                       swap(index[0],index[1]);
+               }
+               
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+               
+               INT left=0;
+               INT right=size-1;
+               
+               while (left<=right)
+               {
+                       while (output[left] > split)
+                               left++;
+                       while (output[right] < split)
+                               right--;
+                       
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               swap(index[left],index[right]);
+                               left++;
+                               right--;
+                       }
+               }
+               
+               if (right+1> 1)
+                       qsort(output,index,right+1);
+               
+               if (size-left> 1)
+                       qsort(&output[left],&index[left], size-left);
+       }
+}
+
+template <class T> 
+void CMath::nmin(REAL* output, T* index, INT size, INT n)
+{
+       if (6*n*size<13*size*log((double)size))
+               for (INT i=0; i<n; i++)
+                       min(&output[i], &index[i], size-i) ;
+       else
+               qsort(output, index, size) ;
+}
+
+template <class T>
+void CMath::min(REAL* output, T* index, INT size)
+{
+       if (size<=0)
+               return ;
+       REAL min_elem = output[0] ;
+       INT min_index = 0 ;
+       for (INT i=1; i<size; i++)
+               if (output[i]<min_elem)
+               {
+                       min_index=i ;
+                       min_elem=output[i] ;
+               }
+       swap(output[0], output[min_index]) ;
+       swap(index[0], index[min_index]) ;
+}
+#endif
diff --git a/dyn_prog/QPalmaDP.i b/dyn_prog/QPalmaDP.i
new file mode 100644 (file)
index 0000000..66ed219
--- /dev/null
@@ -0,0 +1,68 @@
+%module QPalmaDP
+%{
+#define SWIG_FILE_WITH_INIT
+#include "common.h"
+#include "Mathmatics.h"
+#include "penalty_info.h"
+#include "qpalma_dp.h"
+%}
+
+%include "std_vector.i"
+%include "std_string.i"
+
+%include "carrays.i"
+
+%include "common.h"
+%include "Mathmatics.h"
+%include "penalty_info.h"
+%include "qpalma_dp.h"
+
+%array_class(int, intArray) ;
+%array_class(double, doubleArray) ;
+%array_functions(double, doubleFArray) ;
+%array_functions(struct penalty_struct, penaltyArray) ;
+
+/*
+%array_functions(int, intArray) ;
+%array_functions(double, doubleArray) ;
+*/
+%array_class(Pre_score, Pre_scoreArray) ;
+
+%pythoncode
+%{
+
+def createPenaltyArrayFromList(list):
+   array = new_penaltyArray(len(list))
+   for i in range(len(list)):
+      penaltyArray_setitem(array, i, list[i])
+   return array
+
+def createDoubleArrayFromList(list):
+   #array = new_doubleArray(len(list))
+   array = doubleArray(len(list))
+   for i in range(len(list)):
+   #   doubleArray_setitem(array, i, list[i])
+      array[i] = list[i]
+   return array
+
+def createIntArrayFromList(list):
+   #array = new_intArray(len(list))
+   array = intArray(len(list))
+   for i in range(len(list)):
+      #intArray_setitem(array, i, list[i])
+      array[i] = list[i]
+   return array
+
+def createListFromIntArray(array, array_len):
+   list = [0]*array_len
+   for i in range(array_len):
+      list[i] = intArray_getitem(array,i)
+   return list
+
+def createListFromDoubleArray(array, array_len):
+   list = [0]*array_len
+   for i in range(array_len):
+      list[i] = doubleArray_getitem(array,i)
+   return list
+
+%} 
diff --git a/dyn_prog/common.h b/dyn_prog/common.h
new file mode 100644 (file)
index 0000000..61776e9
--- /dev/null
@@ -0,0 +1,160 @@
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <stdlib.h> 
+#include <stdio.h> 
+#include "config.h"
+
+#ifdef SUNOS
+#define bool int
+#define false 0
+#define true 1
+#endif
+
+#ifndef LINUX
+#define RANDOM_MAX 2147483647
+#else
+#define RANDOM_MAX RAND_MAX
+#endif
+
+/**@name Standard Types 
+ * Definition of Platform independent Types
+*/
+//@{
+
+/// Type CHAR
+typedef char CHAR;
+typedef CHAR* P_CHAR;
+
+/// Type BYTE 
+typedef unsigned char BYTE;
+typedef BYTE* P_BYTE;
+
+/// Type SHORT is 2 bytes in size
+typedef short int SHORT;
+typedef SHORT* P_SHORT;
+
+/// Type WORD is 2 bytes in size
+typedef unsigned short int WORD;
+typedef WORD* P_WORD;
+
+/// Type INT is 4 bytes in size
+typedef int INT;
+typedef INT* P_INT;
+
+/// Type INT is 4 bytes in size
+typedef unsigned int UINT;
+typedef UINT* P_UINT;
+
+/// Type LONG is 8 bytes in size
+typedef long LONG;
+typedef LONG* P_LONG;
+
+/// Type SHORTREAL is 4 bytes in size
+typedef float SHORTREAL;
+typedef SHORTREAL* P_SHORTREAL;
+
+/// Type REAL is 8 bytes in size
+typedef double REAL;
+typedef REAL* P_REAL;
+
+/// Type LONGREAL is 16 bytes in size
+//typedef long double LONGREAL;
+//typedef LONGREAL* P_LONGREAL;
+
+#ifdef USE_SHORTREAL_KERNELCACHE
+       typedef SHORTREAL KERNELCACHE_ELEM;
+#else
+       typedef REAL KERNELCACHE_ELEM;
+#endif
+
+typedef KERNELCACHE_ELEM P_KERNELCACHE_ELEM;
+
+typedef LONG KERNELCACHE_IDX;
+
+/// The io libs output [DEBUG] etc in front of every CIO::message
+/// 'higher' messages filter output depending on the loglevel, i.e. CRITICAL messages
+/// will print all M_CRITICAL TO M_EMERGENCY messages to
+enum EMessageType
+{
+       M_DEBUG,
+       M_INFO,
+       M_NOTICE,
+       M_WARN,
+       M_ERROR,
+       M_CRITICAL,
+       M_ALERT,
+       M_EMERGENCY,
+       M_MESSAGEONLY,
+       M_PROGRESS
+};
+
+enum EKernelType
+{
+       K_UNKNOWN = 0,
+       K_OPTIMIZABLE = 4096,
+       K_LINEAR = 10 | K_OPTIMIZABLE,
+       K_POLY  = 20,
+       K_GAUSSIAN = 30,
+       K_HISTOGRAM = 40,
+       K_SALZBERG = 41,
+       K_LOCALITYIMPROVED = 50,
+       K_SIMPLELOCALITYIMPROVED = 60,
+       K_FIXEDDEGREE = 70,
+       K_WEIGHTEDDEGREE =    80 | K_OPTIMIZABLE,
+       K_WEIGHTEDDEGREEPOS = 81 | K_OPTIMIZABLE,
+       K_WEIGHTEDDEGREEPOLYA =    82,
+       K_WD = 83,
+       K_COMMWORD = 90 | K_OPTIMIZABLE ,
+       K_POLYMATCH = 100,
+       K_ALIGNMENT = 110,
+       K_COMMWORDSTRING = 120 | K_OPTIMIZABLE,
+       K_SPARSENORMSQUARED = 130,
+       K_COMBINED = 140 | K_OPTIMIZABLE
+};
+
+enum EFeatureType
+{
+       F_UNKNOWN = 0,
+       F_REAL = 10,
+       F_SHORT = 20,
+       F_CHAR = 30,
+       F_INT = 40,
+       F_BYTE = 50,
+       F_WORD = 60
+};
+
+enum EFeatureClass
+{
+       C_UNKNOWN = 0,
+       C_SIMPLE = 10,
+       C_SPARSE = 20,
+       C_STRING = 30,
+       C_COMBINED = 40
+};
+
+/// Alphabet of charfeatures/observations
+enum E_ALPHABET
+{
+       /// DNA - letters A,C,G,T,*,N,n
+       DNA=0,
+
+       /// PROTEIN - letters a-z
+       PROTEIN=1,
+
+       /// ALPHANUM - [0-9a-z]
+       ALPHANUM=2,
+
+       /// CUBE - [1-6]
+       CUBE=3,
+
+       /// NONE - type has no alphabet
+       NONE=4
+};
+
+//@}
+
+#define TMP_DIR "/tmp/"
+//#define TMP_DIR "/short/x46/tmp/"
+
+#endif
diff --git a/dyn_prog/config.h b/dyn_prog/config.h
new file mode 100644 (file)
index 0000000..88b448b
--- /dev/null
@@ -0,0 +1,19 @@
+
+#define HAVE_MATLAB 1
+
+#define HAVE_LARGEFILE 1
+
+
+
+
+
+
+#define USE_BIGSTATES 1
+
+
+#define USE_HMMCACHE 1
+
+
+
+
+
diff --git a/dyn_prog/debug_tools.h b/dyn_prog/debug_tools.h
new file mode 100644 (file)
index 0000000..7243830
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef __DEBUG_TOOLS__
+#define __DEBUG_TOOLS__
+
+#include <cstdio>
+
+void fassert(bool exp,int line, char* file) {
+   if (! exp) {
+      printf("invalid fassert at line %d in file %s!\n",line,file);
+      exit(EXIT_FAILURE);
+   }
+}
+
+#define FA(expr) (fassert(expr,__LINE__,__FILE__))
+
+#endif // __DEBUG_TOOLS__
diff --git a/dyn_prog/fill_matrix.cpp b/dyn_prog/fill_matrix.cpp
new file mode 100644 (file)
index 0000000..1425ade
--- /dev/null
@@ -0,0 +1,485 @@
+/*********************************************************************************************/
+// Version of fill_matrix with fills several alignment matrices (best, second_best ...)
+// nr_paths det. number of these matrices
+//
+// est_len: Number of columns in alignment matrix: length(EST) +1 (for the gap) (variable: i)
+// dna_len: Number of lines in alignment matrix: length(DNA) +1 (for the gap) (variable: j)
+//
+// !!! IMPORTANT !!! The pictures and equations in the paper/poster etc. describe the algorithm
+// for an alignment matrix where DNA and EST changed places !!! IMPORTANT !!!
+/*********************************************************************************************/
+
+/*
+  matchmatrix: columnwise
+    - A C G T N (dna)
+  - x x x x x x
+  A x x x x x x        
+  C x x x x x x
+  G x x x x x x
+  T x x x x x x
+  N x x x x x x
+(est)
+*/
+
+/*
+  alignment matrix: columnwise
+   |-EST . . .
+  -+------------
+  -|00 ...
+  D|0.
+  N|. .
+  A|.  .
+  .|.
+  .|
+  .|
+*/
+
+
+/* TODO
+   - max_len = 10000 for elegans!
+   - do not use double
+*/
+
+#include "fill_matrix.h"
+
+void fassert(bool exp,int line, char* file);
+#define FA(expr) (fassert(expr,__LINE__,__FILE__))
+
+using namespace std;
+
+int check_char(char base)
+{
+  switch(base)
+  {
+  case 'A':
+  case 'a': return 1;
+  case 'C':    
+  case 'c': return 2;
+  case 'G':
+  case 'g': return 3;
+  case 'T':
+  case 't': return 4;
+  case 'N':
+  case 'n': return 5; 
+  //default : return 0;
+  default : return -1;
+  }
+  
+}
+
+ /* system of Palma. Otherwise scoring_functions should point to an array of
+ * plifs scoring the respective pairs of characters together with the EST
+ * quality score.
+ */
+
+double getScore(struct penalty_struct* qualityScores, int mlen, int dnaChar, int estChar, double baseScore) {
+   double score = 0.0;
+   FA(estChar > 0 && estChar < 6);
+   FA(dnaChar > -1 && dnaChar < 6);
+
+   //printf("Starting scoring / scheme is USE_QUALITY_SCORES\n");
+   //printf("estChar,dnaChar are: %d,%d,%d",estChar,dnaChar);
+   
+   int currentPos = (estChar-1)*6+dnaChar;
+   struct penalty_struct currentPen = qualityScores[currentPos];
+
+       INT idx = 0 ;
+       for (INT i=0; i<currentPen.len; i++)
+               if (currentPen.limits[i] <= baseScore)
+                       idx++ ;
+       
+       if (idx==0)
+               score=currentPen.penalties[0] ;
+       else if (idx==currentPen.len)
+               score=currentPen.penalties[currentPen.len-1] ;
+       else
+       {
+               score = (currentPen.penalties[idx]*(baseScore-currentPen.limits[idx-1]) + currentPen.penalties[idx-1]*
+                          (currentPen.limits[idx]-baseScore)) / (currentPen.limits[idx]-currentPen.limits[idx-1]) ;  
+   }
+
+   //score = lookup_penalty(&currentPen,idx, NULL, 0);
+   //if(baseScore <= currentPen.limits[0]) {
+   //   score = currentPen.penalties[0];
+   //} else {
+   //   int sp_idx;
+   //   for(sp_idx=0;sp_idx<currentPen.len;sp_idx++) {
+   //      if(currentPen.limits[sp_idx] <= baseScore )
+   //         score = currentPen.penalties[sp_idx];
+   //   //printf("Current score is %f\n",score);
+   //   }
+   //}
+
+   return score;
+}
+
+void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, double* prb, penalty_struct* functions , double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions, mode currentMode)
+{      
+  //printf("Entering fill_matrix...\n");
+
+  /*********************************************************************************************/
+  // variables
+  /*********************************************************************************************/
+  const int mlen = 6; // length of matchmatrix
+  int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
+
+  double baseScore;
+  double *est_scores = prb ;
+
+  double prevValue ;
+  double tempValue;
+  int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
+  //printf("max_len: %d\n", max_len) ;
+
+  int num_elem ; // counter variable
+
+  const int max_num_elem = (4+nr_paths+max_len) * nr_paths ; // upper bound on num_elem
+  double *scores=new double[max_num_elem] ; //are sorted later
+  struct ArrayElem *array=new ArrayElem[max_num_elem] ;
+  //printf("%d\n", max_num_elem*sizeof(ArrayElem)) ;
+  
+  int max_ss_element[nr_paths*nr_paths]; //position on dna with max ss score for introns of length greater than max_len
+  int splice_pos ; //splice position get from donor_sites
+  int start_splice_pos ; //where to start looking in donor_sites
+
+  double max_scores[nr_paths] ;
+
+  /*********************************************************************************************/
+  // Initialisation of all alignment matrices, columnwise
+  /*********************************************************************************************/
+  for (int z=0; z<nr_paths; z++) /* matrice z */
+  {
+    max_scores[z] = log(0.0) ;
+    max_score_positions[2*z]    = 0; // pos i
+    max_score_positions[2*z +1] = 0; // pos j
+
+    for (int i=0; i<est_len; i++) /* i.column */
+    {
+      for (int j=0; j<dna_len; j++) /* j.line */
+      {
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->value = log(0.0); // -inf
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_i = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_j = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_matrix_no = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->issplice = 0;
+      }
+    }
+  }
+
+  /*********************************************************************************************/      
+  // Element (0,0) for first/best matrice (z=0)
+  /*********************************************************************************************/       
+  ((Pre_score*)matrices[0])->value = 0;
+  ((Pre_score*)matrices[0])->prev_i = 0;
+  ((Pre_score*)matrices[0])->prev_j = 0;
+  ((Pre_score*)matrices[0])->prev_matrix_no = 0;
+  ((Pre_score*)matrices[0])->issplice = 0;
+
+  /*********************************************************************************************/      
+  // Line Zero for first/best matrice (z=0)
+  /*********************************************************************************************/
+  for (int i=1; i<est_len; i++) /* 0.line, all columns (DNA='-') */
+  {    
+    ((Pre_score*)matrices[0] +(i*dna_len))->value = 0 ;                
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_i = i-1;
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_j = 0;    
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_matrix_no = 0;
+    ((Pre_score*)matrices[0] +(i*dna_len))->issplice = 0;
+  }
+
+
+  /*********************************************************************************************/      
+  // Column Zero for first/best matrice (z=0)
+  /*********************************************************************************************/      
+  for (int j=1; j<dna_len; j++) // 0.column, all lines
+  {      
+    ((Pre_score*)matrices[0] +j)->value =  0 ;
+    ((Pre_score*)matrices[0] +j)->prev_i = 0 ;
+    ((Pre_score*)matrices[0] +j)->prev_j = j-1 ;
+    ((Pre_score*)matrices[0] +j)->prev_matrix_no = 0 ;
+    ((Pre_score*)matrices[0] +j)->issplice = 0;
+  }
+
+   
+  /*********************************************************************************************/      
+  // !!!! Column Zero and line Zero for all other matrices except the best one (z>0): -INF !!!! 
+  /*********************************************************************************************/      
+
+
+   //printf("after initialization\n");
+  
+  /*********************************************************************************************/
+  // all other entries for all matrices, last column and row treated seperately
+  /*********************************************************************************************/
+  
+  /* columnwise, 0.column and 0.line already filled */
+  for (int i=1; i<est_len; i++) /* i.column */
+  {
+    
+    start_splice_pos = 0 ; //where to start looking in donor_sites
+    for (int zz=0; zz<nr_paths*nr_paths; zz++) {max_ss_element[zz]=0 ;} // initializing
+  
+    for (int j=1; j<dna_len; j++) //j.line
+    {
+      dnaChar = check_char(dna[j-1]) ; // -1 because of of the gaps in the 0.line/column of the al-matrix
+      estChar = check_char(est[i-1]) ;
+      baseScore = est_scores[i-1];
+
+      //printf("at iteration %d %d\n",i,j);
+
+      //if introns of length greater than max_len possible
+      if (j > max_len)
+      {
+       //storing highest ss_score (the position) and NOT -INF
+       int new_ss_pos = j-max_len ;
+       double donor_score = donor[new_ss_pos-1] ;
+       if (isfinite(donor_score)) //splice site!
+        {
+         if (max_ss_element[nr_paths-1] != 0)
+          { 
+           for (int zz=0; zz<nr_paths; zz++) //for each alignment matrix
+            {
+             int pos_with_minscore = nr_paths ; //assuming, the new score is the worst score
+             double new_intron_score = donor_score+((Pre_score*)matrices[zz]+(i*dna_len)+new_ss_pos-1)->value ; //could be -INF
+             if (!(isfinite(new_intron_score))) {printf("new_intron_score is -INF\n") ;}
+             for (int pos=0; pos<nr_paths; pos++) //find worst score of all nr_paths+1 scores
+              {
+               int act_splice_position = max_ss_element[zz*nr_paths+pos] ;
+               assert(isfinite(donor[act_splice_position-1])) ;
+               double act_intron_score = donor[act_splice_position-1]+((Pre_score*)matrices[zz]+(i*dna_len)+act_splice_position-1)->value ;
+               if (new_intron_score >= act_intron_score) 
+                {
+                 new_intron_score = act_intron_score ;
+                 pos_with_minscore = pos ;
+               } //else nothing happens, new intron score still the worst one
+             }
+             if (pos_with_minscore != nr_paths)
+              {
+               max_ss_element[zz*nr_paths+pos_with_minscore] = new_ss_pos ; //replacing
+             }
+           }
+         }
+         else
+          {
+           for (int pos=0; pos<nr_paths; pos++)
+            {
+             if (max_ss_element[pos]==0)  //no previous splice site at this pos
+              {
+               for (int zz=0; zz<nr_paths; zz++) {max_ss_element[zz*nr_paths+pos]=j-max_len ;}
+               break ;
+             }
+           } 
+         }
+       }
+       //start_pos in donor_sites so that only introns shorter then max_length are seen
+       if ((start_splice_pos < nr_donor_sites) && donor_sites[start_splice_pos] == j-max_len)
+        {
+         start_splice_pos++ ;
+       }
+      }
+      
+
+      /* Computing all possible scores */
+      num_elem = 0 ; /* number of different scores/possibilities so far */
+      for (int z=0; z<nr_paths; z++) /* matrice z */
+      {
+       Pre_score* actMatrix = (Pre_score*)matrices[z]; /* hm, why?  */
+
+       /***************************************/
+       // 0. Zero (local alignment version)
+        /***************************************/
+       assert(num_elem<max_num_elem) ;
+       array[num_elem].prev_i = 0; 
+       array[num_elem].prev_j = 0; 
+       array[num_elem].prev_matrix_no = z;
+       array[num_elem].isSplice = false; /* no splice site */
+       scores[num_elem] = 0 ;
+       num_elem++ ;
+
+
+        /***************************************/
+       // 1. gap on EST (j-1,i) -> (j,i) 
+        /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
+       if (isfinite(prevValue))
+       {
+
+      if( currentMode == USE_QUALITY_SCORES ) {
+         tempValue = prevValue + matchmatrix[dnaChar];
+      } else {
+             tempValue = prevValue + matchmatrix[mlen*dnaChar];   /* score(gap,DNA) */
+      }
+
+
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i; 
+           array[num_elem].prev_j = j-1; /* predecessor */
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false; /* no splice site */
+           scores[num_elem] = -tempValue ;
+           num_elem++ ;
+         }
+       }
+      
+
+       /***************************************/
+       // 2. match/mismatch (j-1,i-1) -> (j,i)
+       /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
+       if (isfinite(prevValue))
+       {
+      if( currentMode == USE_QUALITY_SCORES ) {
+        tempValue = prevValue + getScore(qualityScores,mlen,dnaChar,estChar,baseScore);
+      } else {
+        tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);
+     }
+
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i-1; /* predecessor */
+           array[num_elem].prev_j = j-1; /* predecessor */
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false; /* no splice site */
+           scores[num_elem] = -tempValue ;
+           num_elem++ ;
+         }
+       }
+       
+
+       /***************************************/
+       // 3. gap on DNA (j,i-1) -> (j,i)
+       /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
+       if (isfinite(prevValue))
+       {
+
+      if( currentMode == USE_QUALITY_SCORES ) {
+        tempValue = prevValue + getScore(qualityScores,mlen,0,estChar,baseScore);
+      } else {
+            tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
+     }
+
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i-1; /* predecessor */
+           array[num_elem].prev_j = j; 
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false;  /* no splice site */
+           scores[num_elem]=-tempValue ;
+           num_elem++ ;
+         }
+       }
+
+       /***************************************/
+       // 4. all possible splice sites/introns
+       /***************************************/
+       // acceptor[j-1]: g of ag (-1 because cpp starts counting at 0)
+       // donor[splice_pos-1]: g of gt/gc (-1 because cpp starts counting at 0)
+       // donor_sites: the g's of the gt/gc
+       
+       if (isfinite(acceptor[j-1]))
+       {
+         
+         //the nr_paths many introns of length greater than max_len
+         for (int pos=0; pos<nr_paths; pos++)
+          {
+           if (max_ss_element[z*nr_paths+pos]>0)
+            {
+             assert((j-max_ss_element[z*nr_paths+pos]+1) > max_len) ;
+             splice_pos = max_ss_element[z*nr_paths+pos] ; //the g of the gt 
+             prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+             if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+              {
+               int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+               assert(dist>max_len) ;
+               tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+               if (isfinite(tempValue))
+                {
+                 assert(num_elem<max_num_elem) ;
+                 array[num_elem].prev_i = i; 
+                 array[num_elem].prev_j = splice_pos-1; // predecessor
+                 array[num_elem].prev_matrix_no = z;
+                 array[num_elem].isSplice = true; // splice site
+                 scores[num_elem]=-tempValue ;
+                 num_elem++ ;
+               }
+               else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+                {
+                 printf("Something wrong with donor_sites(1000er) (intron_length: %d)\n", dist) ;
+                 sleep(5000) ;
+               }
+             }
+           }
+         }
+
+          //all introns of length <= max_len
+          for (int k=start_splice_pos; k < nr_donor_sites; k++) 
+         //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
+         {   
+           splice_pos = donor_sites[k] ;
+           if (splice_pos > j-4) { break ;}
+           prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+           
+           if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+           {
+             int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+             assert(dist<=max_len) ;
+             tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+             if (isfinite(tempValue))
+             {
+               assert(num_elem<max_num_elem) ;
+               array[num_elem].prev_i = i; 
+               array[num_elem].prev_j = splice_pos-1; /* predecessor */
+               array[num_elem].prev_matrix_no = z;
+               array[num_elem].isSplice = true; /* splice site */
+               scores[num_elem]=-tempValue ;
+               num_elem++ ;
+             }
+             else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+             {
+               printf("Something wrong with donor_sites (intron_length: %d)\n", dist) ;
+               sleep(5000) ;
+             }
+           }
+         }
+       }
+
+      } //end of z
+      
+      CMath::nmin<struct ArrayElem>(scores, array, num_elem, nr_paths);
+      
+      /* the nr_paths many highest scores are written in the nr_paths many matrices*/
+      for (int z=0; z<nr_paths; z++) {
+       if (z>=num_elem) {
+         //nothing happens, entry in matrices[z] at pos. (i,j) still -inf
+       }
+       else {
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->value = -scores[z] ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_i = array[z].prev_i ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_j = array[z].prev_j ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->issplice = array[z].isSplice ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_matrix_no = array[z].prev_matrix_no;
+
+         if (-scores[z]>=max_scores[z]) { //storing the postion of the highest alignment score yet
+           max_scores[z] = -scores[z] ;
+           max_score_positions[2*z]    = i; // pos i
+           max_score_positions[2*z +1] = j; // pos j
+         }
+       }
+      } //end of z
+
+    } //end of j
+  } // end of i
+
+  //free memory
+  delete[] array;
+  delete[] scores;
+
+  //printf("Leaving fill_matrix...\n");
+  return;
+}
diff --git a/dyn_prog/fill_matrix.h b/dyn_prog/fill_matrix.h
new file mode 100644 (file)
index 0000000..9432c4e
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef _FILLMATRIX_H__
+#define _FILLMATRIX_H__
+
+#include "Mathmatics.h"
+#include "penalty_info.h"
+#include <algorithm> //Eigentlich in Header
+#include <assert.h>
+#include <ctype.h>
+#include <iostream>
+#include <math.h>
+#include <stdlib.h> 
+#include <string.h>
+
+
+struct ArrayElem { //24B
+  double score;
+  int prev_j;
+  int prev_i;
+  int prev_matrix_no;   
+  bool isSplice;
+} ;
+
+struct pre_score { //24B
+  double value; //8
+  int prev_i; //4
+  int prev_j; //4
+  int prev_matrix_no;//4
+  bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+  double value ; 
+  bool issplice;
+  int previ,prevj ;
+  char est_char;
+  char dna_char;
+};
+
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+// funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+
+extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar);
+
+
+double scoreMatch(int dnaChar, int estChar, double baseScore);
+
+
+#endif // _FILLMATRIX_H__
diff --git a/dyn_prog/io.cpp b/dyn_prog/io.cpp
new file mode 100644 (file)
index 0000000..5ae3e1c
--- /dev/null
@@ -0,0 +1,96 @@
+#include "io.h"
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+FILE* CIO::target=stdout;
+
+CIO::CIO()
+{
+}
+
+void CIO::message(EMessageType prio, const CHAR *fmt, ... )
+{
+       check_target();
+       print_message_prio(prio, target);
+    va_list list;
+    va_start(list,fmt);
+    vfprintf(target,fmt,list);
+    va_end(list);
+    fflush(target);
+}
+
+void CIO::buffered_message(EMessageType prio, const CHAR *fmt, ... )
+{
+       check_target();
+       print_message_prio(prio, target);
+    va_list list;
+    va_start(list,fmt);
+    vfprintf(target,fmt,list);
+    va_end(list);
+}
+
+CHAR* CIO::skip_spaces(CHAR* str)
+{
+       INT i=0;
+
+       if (str)
+       {
+               for (i=0; isspace(str[i]); i++);
+
+               return &str[i];
+       }
+       else 
+               return str;
+}
+
+void CIO::set_target(FILE* t)
+{
+       target=t;
+}
+
+void CIO::check_target()
+{
+       if (!target)
+               target=stdout;
+}
+
+void CIO::print_message_prio(EMessageType prio, FILE* target)
+{
+       switch (prio)
+       {
+               case M_DEBUG:
+                       fprintf(target, "[DEBUG] ");
+                       break;
+               case M_PROGRESS:
+                       fprintf(target, "[PROGRESS] ");
+                       break;
+               case M_INFO:
+                       //fprintf(target, "[INFO] ");
+                       break;
+               case M_NOTICE:
+                       fprintf(target, "[NOTICE] ");
+                       break;
+               case M_WARN:
+                       fprintf(target, "[WARN] ");
+                       break;
+               case M_ERROR:
+                       fprintf(target, "[ERROR] ");
+                       break;
+               case M_CRITICAL:
+                       fprintf(target, "[CRITICAL] ");
+                       break;
+               case M_ALERT:
+                       fprintf(target, "[ALERT] ");
+                       break;
+               case M_EMERGENCY:
+                       fprintf(target, "[EMERGENCY] ");
+                       break;
+               case M_MESSAGEONLY:
+                       break;
+               default:
+                       break;
+       }
+}
diff --git a/dyn_prog/io.h b/dyn_prog/io.h
new file mode 100644 (file)
index 0000000..c127f92
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef __CIO_H__
+#define __CIO_H__
+
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+
+class CIO
+{
+public:
+       CIO();
+
+       static void set_target(FILE* target);
+       static void message(EMessageType prio, const CHAR *fmt, ... );
+
+       inline static void not_implemented() 
+       {
+               message(M_ERROR, "Sorry, not yet implemented\n");
+       }
+
+       static void buffered_message(EMessageType prio, const CHAR *fmt, ... );
+
+       static CHAR* skip_spaces(CHAR* str);
+
+protected:
+       static void check_target();
+       static void print_message_prio(EMessageType prio, FILE* target);
+
+protected:
+       static FILE* target;
+};
+#endif
diff --git a/dyn_prog/penalty_info.cpp b/dyn_prog/penalty_info.cpp
new file mode 100644 (file)
index 0000000..d5d739f
--- /dev/null
@@ -0,0 +1,368 @@
+#include <assert.h>
+#include "config.h"
+//#include "features/CharFeatures.h"
+//#include "features/StringFeatures.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "io.h"
+
+
+#include "fill_matrix.h"
+#include "penalty_info.h"
+
+void init_penalty_struct(struct penalty_struct &PEN)
+{
+       PEN.limits=NULL ;
+       PEN.penalties=NULL ;
+       PEN.id=-1 ;
+       PEN.next_pen=NULL ;
+       PEN.transform = T_LINEAR ;
+       PEN.name = NULL ;
+       PEN.max_len=0 ;
+       PEN.min_len=0 ;
+       PEN.cache=NULL ;
+       PEN.use_svm=0 ;
+}
+
+void init_penalty_struct_cache(struct penalty_struct &PEN)
+{
+       if (PEN.cache || PEN.use_svm)
+               return ;
+               
+       REAL* cache=new REAL[PEN.max_len+1] ;
+       if (cache)
+       {
+               for (INT i=0; i<=PEN.max_len; i++)
+                       cache[i] = lookup_penalty(&PEN, i, 0, false) ;
+               PEN.cache = cache ;
+       }
+}
+
+void delete_penalty_struct_palma(struct penalty_struct &PEN)
+{
+       if (PEN.id!=-1)
+       {
+               delete[] PEN.limits ;
+               delete[] PEN.penalties ;
+               delete[] PEN.name ;
+               delete[] PEN.cache ;
+       }
+}
+
+void delete_penalty_struct_array(struct penalty_struct *PEN, INT len)
+{
+       for (int i=0; i<len; i++)
+               delete_penalty_struct_palma(PEN[i]) ;
+       delete[] PEN ;
+}
+
+void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp) {
+   newp->len       = old->len;
+   newp->limits    = old->limits;
+   newp->penalties = old->penalties;
+   newp->max_len   = old->max_len;
+   newp->min_len   = old->min_len;
+   newp->cache     = old->cache;
+   newp->transform = old->transform;
+   newp->id        = old->id;
+   newp->next_pen  = old->next_pen;
+   newp->name      = old->name;
+   newp->use_svm   = old->use_svm;
+}
+
+
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, int &P)
+//{
+//     P = mxGetN(mx_penalty_info) ;
+//     
+//     struct penalty_struct * PEN = new struct penalty_struct[P] ;
+//     for (INT i=0; i<P; i++)
+//              init_penalty_struct(PEN[i]) ;
+//     
+//     for (INT i=0; i<P; i++)
+//     {
+//       const mxArray* mx_elem = mxGetCell(mx_penalty_info, i) ;
+//       if (mx_elem==NULL || !mxIsStruct(mx_elem))
+//         {
+//           CIO::message(M_ERROR, "empty cell element\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         } ;
+//       const mxArray* mx_id_field = mxGetField(mx_elem, 0, "id") ;
+//       if (mx_id_field==NULL || !mxIsNumeric(mx_id_field) || 
+//           mxGetN(mx_id_field)!=1 || mxGetM(mx_id_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing id field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_limits_field = mxGetField(mx_elem, 0, "limits") ;
+//       if (mx_limits_field==NULL || !mxIsNumeric(mx_limits_field) ||
+//           mxGetM(mx_limits_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing limits field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT len = mxGetN(mx_limits_field) ;
+//       
+//       const mxArray* mx_penalties_field = mxGetField(mx_elem, 0, "penalties") ;
+//       if (mx_penalties_field==NULL || !mxIsNumeric(mx_penalties_field) ||
+//           mxGetM(mx_penalties_field)!=1 || mxGetN(mx_penalties_field)!=len)
+//         {
+//           CIO::message(M_ERROR, "missing penalties field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_transform_field = mxGetField(mx_elem, 0, "transform") ;
+//       if (mx_transform_field==NULL || !mxIsChar(mx_transform_field))
+//         {
+//           CIO::message(M_ERROR, "missing transform field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_name_field = mxGetField(mx_elem, 0, "name") ;
+//       if (mx_name_field==NULL || !mxIsChar(mx_name_field))
+//         {
+//           CIO::message(M_ERROR, "missing name field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_max_len_field = mxGetField(mx_elem, 0, "max_len") ;
+//       if (mx_max_len_field==NULL || !mxIsNumeric(mx_max_len_field) ||
+//           mxGetM(mx_max_len_field)!=1 || mxGetN(mx_max_len_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing max_len field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_min_len_field = mxGetField(mx_elem, 0, "min_len") ;
+//       if (mx_min_len_field==NULL || !mxIsNumeric(mx_min_len_field) ||
+//           mxGetM(mx_min_len_field)!=1 || mxGetN(mx_min_len_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing min_len field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_use_svm_field = mxGetField(mx_elem, 0, "use_svm") ;
+//       if (mx_use_svm_field==NULL || !mxIsNumeric(mx_use_svm_field) ||
+//           mxGetM(mx_use_svm_field)!=1 || mxGetN(mx_use_svm_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing use_svm field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT use_svm = (INT) mxGetScalar(mx_use_svm_field) ;
+//             //fprintf(stderr, "use_svm_field=%i\n", use_svm) ;
+//       
+//       const mxArray* mx_next_id_field = mxGetField(mx_elem, 0, "next_id") ;
+//       if (mx_next_id_field==NULL || !mxIsNumeric(mx_next_id_field) ||
+//           mxGetM(mx_next_id_field)!=1 || mxGetN(mx_next_id_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing next_id field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT next_id = (INT) mxGetScalar(mx_next_id_field)-1 ;
+//       
+//       INT id = (INT) mxGetScalar(mx_id_field)-1 ;
+//       if (i<0 || i>P-1)
+//         {
+//           CIO::message(M_ERROR, "id out of range\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT max_len = (INT) mxGetScalar(mx_max_len_field) ;
+//       if (max_len<0 || max_len>1024*1024*100)
+//         {
+//           CIO::message(M_ERROR, "max_len out of range\n") ;
+//                     delete_penalty_struct_array(PEN,P) ;
+//                     return NULL ;
+//         }
+//       PEN[id].max_len = max_len ;
+//
+//       INT min_len = (INT) mxGetScalar(mx_min_len_field) ;
+//       if (min_len<0 || min_len>1024*1024*100)
+//         {
+//           CIO::message(M_ERROR, "min_len out of range\n") ;
+//                     delete_penalty_struct_array(PEN,P) ;
+//                     return NULL ;
+//         }
+//       PEN[id].min_len = min_len ;
+//       
+//       if (PEN[id].id!=-1)
+//         {
+//           CIO::message(M_ERROR, "penalty id already used\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       PEN[id].id=id ;
+//       if (next_id>=0)
+//         PEN[id].next_pen=&PEN[next_id] ;
+//       //fprintf(stderr,"id=%i, next_id=%i\n", id, next_id) ;
+//       
+//       assert(next_id!=id) ;
+//       PEN[id].use_svm=use_svm ;
+//       PEN[id].limits = new REAL[len] ;
+//       PEN[id].penalties = new REAL[len] ;
+//       double * limits = mxGetPr(mx_limits_field) ;
+//       double * penalties = mxGetPr(mx_penalties_field) ;
+//       
+//       for (INT i=0; i<len; i++)
+//         {
+//           PEN[id].limits[i]=limits[i] ;
+//           PEN[id].penalties[i]=penalties[i] ;
+//         }
+//       PEN[id].len = len ;
+//       
+//       char *transform_str = mxArrayToString(mx_transform_field) ;                           
+//       char *name_str = mxArrayToString(mx_name_field) ;                             
+//       
+//       if (strcmp(transform_str, "log")==0)
+//         PEN[id].transform = T_LOG ;
+//       else if (strcmp(transform_str, "log(+1)")==0)
+//         PEN[id].transform = T_LOG_PLUS1 ;   
+//       else if (strcmp(transform_str, "log(+3)")==0)
+//         PEN[id].transform = T_LOG_PLUS3 ;   
+//       else if (strcmp(transform_str, "(+3)")==0)
+//         PEN[id].transform = T_LINEAR_PLUS3 ;        
+//       else if (strcmp(transform_str, "")==0)
+//         PEN[id].transform = T_LINEAR ;      
+//       else
+//         {
+//           delete_penalty_struct_array(PEN,P) ;
+//           mxFree(transform_str) ;
+//           return NULL ;
+//         }
+//       PEN[id].name = new char[strlen(name_str)+1] ;
+//       strcpy(PEN[id].name, name_str) ;
+//       
+//       init_penalty_struct_cache(PEN[id]) ;
+//         
+//       mxFree(transform_str) ;
+//       mxFree(name_str) ;
+//     }
+//     return PEN ;
+//}
+
+REAL lookup_penalty_svm(const struct penalty_struct *PEN, INT p_value, REAL *d_values)
+{      
+       if (PEN==NULL)
+               return 0 ;
+       assert(PEN->use_svm>0) ;
+       REAL d_value=d_values[PEN->use_svm-1] ;
+       //fprintf(stderr,"transform=%i, d_value=%1.2f\n", (INT)PEN->transform, d_value) ;
+       
+       switch (PEN->transform)
+       {
+       case T_LINEAR:
+               break ;
+       case T_LOG:
+               d_value = log(d_value) ;
+               break ;
+       case T_LOG_PLUS1:
+               d_value = log(d_value+1) ;
+               break ;
+       case T_LOG_PLUS3:
+               d_value = log(d_value+3) ;
+               break ;
+       case T_LINEAR_PLUS3:
+               d_value = d_value+3 ;
+               break ;
+       default:
+               CIO::message(M_ERROR, "unknown transform\n") ;
+               break ;
+       }
+       
+       INT idx = 0 ;
+       REAL ret ;
+       for (INT i=0; i<PEN->len; i++)
+               if (PEN->limits[i]<=d_value)
+                       idx++ ;
+       
+       if (idx==0)
+               ret=PEN->penalties[0] ;
+       else if (idx==PEN->len)
+               ret=PEN->penalties[PEN->len-1] ;
+       else
+       {
+               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
+       }
+       
+       //fprintf(stderr,"ret=%1.2f\n", ret) ;
+
+       if (PEN->next_pen)
+               ret+=lookup_penalty(PEN->next_pen, p_value, d_values);
+       
+       //fprintf(stderr,"ret=%1.2f\n", ret) ;
+       
+       return ret ;
+}
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
+                                       REAL* svm_values, bool follow_next)
+{      
+       if (PEN==NULL)
+               return 0 ;
+       if (PEN->use_svm)
+               return lookup_penalty_svm(PEN, p_value, svm_values) ;
+               
+       if ((p_value<PEN->min_len) || (p_value>PEN->max_len))
+               return -CMath::INFTY ;
+       
+       if (PEN->cache!=NULL && (p_value>=0) && (p_value<=PEN->max_len))
+       {
+               REAL ret=PEN->cache[p_value] ;
+               if (PEN->next_pen && follow_next)
+                       ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+               return ret ;
+       }
+       
+       REAL d_value = (REAL) p_value ;
+       switch (PEN->transform)
+       {
+       case T_LINEAR:
+               break ;
+       case T_LOG:
+               d_value = log(d_value) ;
+               break ;
+       case T_LOG_PLUS1:
+               d_value = log(d_value+1) ;
+               break ;
+       case T_LOG_PLUS3:
+               d_value = log(d_value+3) ;
+               break ;
+       case T_LINEAR_PLUS3:
+               d_value = d_value+3 ;
+               break ;
+       default:
+               CIO::message(M_ERROR, "unknown transform\n") ;
+               break ;
+       }
+
+       INT idx = 0 ;
+       REAL ret ;
+       for (INT i=0; i<PEN->len; i++)
+               if (PEN->limits[i]<=d_value)
+                       idx++ ;
+       
+       if (idx==0)
+               ret=PEN->penalties[0] ;
+       else if (idx==PEN->len)
+               ret=PEN->penalties[PEN->len-1] ;
+       else
+       {
+               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
+       }
+       //if (p_value>=30 && p_value<150)
+       //fprintf(stderr, "%s %i(%i) -> %1.2f\n", PEN->name, p_value, idx, ret) ;
+       
+       if (PEN->next_pen && follow_next)
+               ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+
+       return ret ;
+}
diff --git a/dyn_prog/penalty_info.h b/dyn_prog/penalty_info.h
new file mode 100644 (file)
index 0000000..30de4aa
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef __PENALTY_INFO_H__
+#define __PENALTY_INFO_H__
+
+#include "common.h"
+#include "Mathmatics.h"
+
+enum ETransformType
+{
+       T_LINEAR,
+       T_LOG,
+       T_LOG_PLUS1,
+       T_LOG_PLUS3,
+       T_LINEAR_PLUS3
+}  ;
+
+struct penalty_struct
+{
+       INT len ;
+       REAL *limits ;
+       REAL *penalties ;
+       INT max_len ;
+       INT min_len ;
+       REAL *cache ;
+       enum ETransformType transform ;
+       INT id ;
+       struct penalty_struct *next_pen ;
+       char * name ;
+       INT use_svm ;
+} ;
+
+void init_penalty_struct(struct penalty_struct &PEN) ;
+void delete_penalty_struct_palma(struct penalty_struct &PEN) ;
+//void delete_penalty_struct_array(struct penalty_struct *PEN, INT len) ;
+void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp);
+
+//#ifdef HAVE_MATLAB
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, mwSize &P) ;
+//#endif
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
+                                       REAL* svm_values, bool follow_next=true) ;
+
+enum mode { NORMAL, USE_QUALITY_SCORES};
+
+#endif
diff --git a/dyn_prog/print_align.cpp b/dyn_prog/print_align.cpp
new file mode 100644 (file)
index 0000000..312cabc
--- /dev/null
@@ -0,0 +1,97 @@
+#include "fill_matrix.h"
+using namespace std;
+
+void print_align(Pre_score* matrix, int length_est,  int length_dna,Align_pair* vektor, int result_length, int print_matrix)
+  
+{
+  //Druck der matrix
+  switch(print_matrix)
+    {
+    case 1:
+      {
+       cout <<"Matrix: \n";
+       
+       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+         {
+           cout << "Zeile " << k << ":\t"; 
+           for (int l= 0; l<length_dna; l++)
+             {
+               cout << ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+             } 
+           cout <<"\n";
+         }
+       break;  
+      }
+      
+    case 2:
+      {
+       cout <<"Matrix: \n";
+       
+       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+         {
+           cout << "Zeile " << k << ":\t"; 
+           for (int l= 0; l<length_dna; l++)
+             {
+               if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k-1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+                 cout << "\\" <<((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+                 cout << "<"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k -1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l))
+                 cout << "|"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else             { cout << ((Pre_score*)matrix + (k*length_dna) + l)->value << "("; 
+               cout << ((Pre_score*)matrix + (k*length_dna) + l)->prev_i << "," << ((Pre_score*)matrix + (k*length_dna) + l)->prev_j << ")\t";  
+               }
+               
+             } 
+           cout <<"\n";
+         }
+        
+       cout << "length_est" << length_est << "\n";
+        cout << "length_dna" << length_dna << "\n";
+        cout << "result_length: " << result_length << "\n";
+       
+       
+       break;  
+      } 
+      
+    case 3:
+      {
+       //Druck des Alignment
+       
+       // cout << "Alignment\n" ;
+       int j = 0;
+       // cout << "resultlength: " << result_length <<"\n";
+       while (j<result_length) {
+         int pacman = j;       
+         cout << "D:(" <<j<<")\t";
+         for (int i=0; ((j<result_length) && (i<50)); i++)
+           {
+             cout << vektor[j].dna_char << " ";        
+             j++;      
+           }
+         
+         cout << "\n"; 
+         
+         cout << "E:(" <<pacman<<")\t";
+         for (int i=0;((pacman<result_length) && (i<50)); i++)
+           {
+             cout << vektor[pacman].est_char << " "; 
+             pacman++;
+           }
+         cout << "\n\n";       
+       }               
+       
+       //Ende Druck Alignment
+       break ;
+      }
+      
+    default:
+      {
+       cout <<"";
+      }
+    }
+  
+  
+  
+}
+
diff --git a/dyn_prog/qpalma_dp.cpp b/dyn_prog/qpalma_dp.cpp
new file mode 100644 (file)
index 0000000..7786745
--- /dev/null
@@ -0,0 +1,277 @@
+#include "qpalma_dp.h"
+#include <cstring>
+
+using namespace std;
+
+/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
+  myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
+  print_matrix) */
+
+Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
+      len = 0;
+      limits = 0;
+      penalties = 0;
+      max_len = 0;
+      min_len = 0;
+      cache = 0;
+      enum ETransformType transform = T_LINEAR;
+      id = 0;
+      name = 0;
+      use_svm = 0;
+
+      // set ptrs to zero first
+      splice_align         = 0;
+      est_align            = 0;
+      mmatrix_param        = 0;
+      alignmentscores      = 0;
+      qualityScoresAllPaths = 0;
+      mlen = 6; // score matrix: length of 6 for "- A C G T N"
+
+      numQualSuppPoints = numq;
+      numPlifs = numQPlifs;
+      use_quality_scores = use_qscores;
+
+      //printf("number of support points: %d\n",numQualSuppPoints);
+      //printf("number of plifs: %d\n",numPlifs );
+      FA( numQualSuppPoints >= 0 );
+      FA( numPlifs >= 0 );
+}
+
+void Alignment::getDNAEST(){}
+
+void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+      int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
+      double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores, 
+      bool remove_duplicate_scores, bool print_matrix) {
+
+   // printf("Entering myalign...\n");
+   dna_len =  dna_len_p + 1 ;
+   est_len =  est_len_p + 1 ;
+   nr_paths = nr_paths_p;
+
+  /***************************************************************************/ 
+  // initialize alignment matrices and call fill_matrix()  
+  /***************************************************************************/
+
+  mode currentMode;
+  if (use_quality_scores) 
+   currentMode = USE_QUALITY_SCORES;
+  else
+   currentMode = NORMAL;
+
+    // dnaest
+   double *DNA_ARRAY = 0;
+   double *EST_ARRAY = 0;
+  
+  //possible donor positions
+  int nr_donor_sites = 0 ;
+  for (int ii=0; ii<d_len; ii++) {
+    if(isfinite(donor[ii])) {
+      nr_donor_sites++ ;
+    }
+  }
+
+  int* donor_sites = new int[nr_donor_sites];
+  int donor_idx = 0 ;
+  for (int ii=0; ii<d_len; ii++) {
+    if(isfinite(donor[ii])) {
+      donor_sites[donor_idx] = ii+1 ;
+      donor_idx++ ;
+    }  
+  }
+
+  int* max_score_positions = new int[nr_paths*2];
+
+  Pre_score** matrices = new Pre_score*[nr_paths];
+  for (int z=0; z<nr_paths; z++) {
+      matrices[z] = new Pre_score[dna_len * est_len];
+  }
+
+   //printf("qualityScores\n");
+   //for(int qidx=0;qidx<numPlifs;qidx++) {
+   //   penalty_struct p; 
+   //   p = qualityScores[qidx];
+   //   printf("%d ",qidx);
+   //   for(int pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",pidx,p.penalties[pidx]);
+   //   printf("\n");
+   //}
+  
+  //printf("calling fill_matrix...\n");
+  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions,currentMode);
+  //printf("after call to fill_matrix...\n");
+
+  /***************************************************************************/ 
+  // return arguments etc.
+  /***************************************************************************/
+  int result_length; //Eine Variable fuer alle Matrizen
+
+   splice_align_size = (dna_len-1)*nr_paths;
+   est_align_size = (est_len-1)*nr_paths;
+
+   int mmatrix_size;
+
+   if (currentMode == USE_QUALITY_SCORES) {
+      mmatrix_param_size = mlen*nr_paths;
+      mmatrix_size = mlen;
+   }
+
+   if (currentMode == NORMAL) {
+      mmatrix_param_size = (mlen*mlen)*nr_paths;
+      mmatrix_size = mlen*mlen;
+   }
+
+   alignmentscores_size = nr_paths; //alignment score for each path/matrix
+   numPathsPlifs = numPlifs*nr_paths; //alignment score for each path/matrix
+
+   splice_align         = new int[splice_align_size];
+   est_align            = new int[est_align_size];
+   mmatrix_param        = new int[mmatrix_param_size];
+   alignmentscores      = new double[nr_paths]; //alignment score for each path/matrix
+
+
+  // printf("before memset...\n");
+  memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
+  memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
+  memset((char*)mmatrix_param, 0, mmatrix_size*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
+  memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
+  //printf("after memset...\n");
+  
+  qualityScoresAllPaths= new penalty_struct*[nr_paths];
+
+  for (int z=0; z<nr_paths; z++) {
+    result_length = 0 ;
+    
+    int* s_align = splice_align + (dna_len-1)*z;  //pointer
+    int* e_align = est_align + (est_len-1)*z ;    //pointer
+    int* mparam  = mmatrix_param + mmatrix_size*z; //pointer
+
+   qualityScoresAllPaths[z] = new penalty_struct[numPlifs];
+
+   int qidx, pidx;
+   for(qidx=0;qidx<numPlifs;qidx++) {
+      penalty_struct p;
+      p.len = numQualSuppPoints;
+      p.limits = (REAL*) calloc(p.len,sizeof(REAL));
+
+      for(pidx=0;pidx<p.len;pidx++)
+         p.limits[pidx] = qualityScores[qidx%numPlifs].limits[pidx];
+
+      p.penalties = (REAL*) calloc(p.len,sizeof(REAL));
+      qualityScoresAllPaths[z][qidx] = p;
+   }
+
+   //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
+
+    //printf("before call to result_align...\n");
+    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityScoresAllPaths[z] , currentMode);
+    //printf("after call to result_align...\n");
+
+   //printf("z is %d\n",z);
+   //for(qidx=0;qidx<numPlifs;qidx++) {
+   //   penalty_struct p;
+   //   p = qualityScoresAllPaths[qidx+numPlifs*z];
+   //   printf("%d ",qidx);
+   //   for(pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",pidx,p.penalties[pidx]);
+   //   printf("\n");
+   //}
+
+   //if(DNA_ARRAY != 0) {
+   //    delete[] DNA_ARRAY;
+   //    delete[] EST_ARRAY;
+   // }
+
+   //DNA_ARRAY = new double[result_length];
+   //EST_ARRAY = new double[result_length];
+
+   // //backtracking
+   // int i = max_score_positions[2*z] ; //i (est)
+   // int j = max_score_positions[2*z +1] ; //j (dna)
+   // int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
+   // int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+   // int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+   // 
+   // for (int ii=result_length; ii>0; ii--) {
+   //   if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+   //   }
+   //   else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
+       //DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       //EST_ARRAY[ii-1] = 0 ; //gap
+   //   }
+   //   else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
+       //DNA_ARRAY[ii-1] = 0 ; //gap
+       //EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+   //   }
+   //   else {// splice site
+       //for (j; j > prev_j; j--) {
+       //  DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       //  EST_ARRAY[ii-1] = 6 ; //intron
+       //  ii-- ;
+       //}
+       //ii++ ; // last ii-- too much (because done in for-loop)       
+   //   }
+   //   
+   //   i = prev_i;
+   //   j = prev_j;    
+   //   prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
+   //   prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
+   //   prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
+   // }
+   // 
+   } //end of z
+
+   //if(DNA_ARRAY != 0) {
+   // delete[] DNA_ARRAY;
+   // delete[] EST_ARRAY;
+   // }
+
+   for (int i=nr_paths-1;i>=0; i--)
+      delete[] matrices[i];
+}
+
+void Alignment::getAlignmentResults(int* s_align, int* e_align,
+      int* mmatrix_p, double* alignscores, double* qScores) {
+
+   int idx;
+   for(idx=0; idx<splice_align_size; idx++)
+      s_align[idx] = splice_align[idx];
+
+   for(idx=0; idx<est_align_size; idx++)
+      e_align[idx] =  est_align[idx];
+
+   for(idx=0; idx<mmatrix_param_size; idx++)
+      mmatrix_p[idx] = mmatrix_param[idx];
+
+   for(idx=0; idx<alignmentscores_size; idx++)
+      alignscores[idx] = alignmentscores[idx];
+   
+   if (use_quality_scores) {
+      penalty_struct currentPlif;
+      int ctr=0;
+      for (int z=0; z<nr_paths; z++) {
+         for(int estChar=1;estChar<6;estChar++) {
+            for(int dnaChar=0;dnaChar<6;dnaChar++) {
+               int currentPos = (estChar-1)*6+dnaChar;
+               currentPlif = qualityScoresAllPaths[z][currentPos];
+               for(int pidx=0; pidx<currentPlif.len; pidx++) {
+                  qScores[ctr] = currentPlif.penalties[pidx];
+                  ctr++;
+               }
+      }}}
+   }
+
+   //for(idx=0; idx<numPathsPlifs; idx++) {
+   //   currentPlif = qualityScoresAllPaths[idx];
+   //   //printf("Size is %dx%d\n",numPathsPlifs,currentPlif.len);
+   //   for(pidx=0; pidx<currentPlif.len; pidx++) {
+   //      qScores[ctr] = currentPlif.penalties[pidx];
+   //      ctr++;
+   //   }
+   //}
+
+   //printf("\nctr is %d\n",ctr);
+   //printf("Leaving getAlignmentResults...\n");
+}
diff --git a/dyn_prog/qpalma_dp.h b/dyn_prog/qpalma_dp.h
new file mode 100644 (file)
index 0000000..4049221
--- /dev/null
@@ -0,0 +1,133 @@
+#ifndef _QPALMA_DP_H_
+#define _QPALMA_DP_H_
+
+#include "penalty_info.h"
+
+struct ArrayElem { //24B
+  double score;
+  int prev_j;
+  int prev_i;
+  int prev_matrix_no;   
+  bool isSplice;
+} ;
+
+struct pre_score { //24B
+  double value; //8
+  int prev_i; //4
+  int prev_j; //4
+  int prev_matrix_no;//4
+  bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+  double value ; 
+  bool issplice;
+  int previ,prevj ;
+  char est_char;
+  char dna_char;
+};
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions, mode currentMode);
+
+int check_char(char base);
+void fassert(bool exp,int line, char* file);
+#define FA(expr) (fassert(expr,__LINE__,__FILE__))
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode);
+
+extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+
+/* 
+ * The function myalign calculates a new alignment given the scoring scheme
+ * Its input arguments are:
+ *
+ * num_path(id)               ->    an integer specifying which alignment has to be done
+ * dna                        ->    
+ * est                        ->
+ * h                          ->
+ * mmatrix                    -> 
+ * donor                      ->
+ * acceptor                   ->
+ * remove_duplicate_scores    -> a boolean
+ * print_matrix               -> a boolean
+ *
+ * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
+ *
+ * the idea of the qualityScores array is as follows
+ *
+ * consider a matrix of 24 plifs
+ * 
+ * -> row major
+ *
+ */
+
+class Alignment {
+
+   private:
+      int* splice_align;
+      int* est_align;
+      int* mmatrix_param;
+      double* alignmentscores;
+      struct penalty_struct** qualityScoresAllPaths;
+
+      int dna_len;
+      int est_len;
+      int mlen;
+      int nr_paths;
+
+      int numQualSuppPoints;
+      int numPlifs;
+      bool use_quality_scores;
+
+      uint splice_align_size ;
+      uint est_align_size ;
+      uint mmatrix_param_size ;
+      uint alignmentscores_size ;
+      uint numPathsPlifs ;
+
+      INT len;
+      REAL *limits;
+      REAL *penalties;
+      INT max_len;
+      INT min_len;
+      REAL *cache;
+      enum ETransformType transform ;
+      INT id;
+      char * name;
+      INT use_svm;
+
+   public:
+      Alignment(int numQPlifs ,int numq, bool use_qscores);
+      ~Alignment() {
+         if(splice_align != 0)
+            delete[] splice_align;
+
+         if(est_align != 0)
+            delete[] est_align;
+
+         if(mmatrix_param != 0)
+            delete[] mmatrix_param;
+
+         if(alignmentscores != 0)
+            delete[] alignmentscores;
+
+         if(qualityScoresAllPaths != 0)
+            delete[] qualityScoresAllPaths;
+      }
+
+      void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+      int est_len_p, double* prb, double* chastity, struct penalty_struct h, 
+      double* matchmatrix, int mm_len, double* donor, int d_len, double* acceptor,
+      int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
+      bool print_matrix);
+
+      void getDNAEST();
+      void getAlignmentResults(int* s_align, int* e_align,
+      int* mmatrix_p, double* alignscores, double* qScores);
+};
+
+#endif  // _QPALMA_DP_H_
+
diff --git a/dyn_prog/result_align.cpp b/dyn_prog/result_align.cpp
new file mode 100644 (file)
index 0000000..d0c6944
--- /dev/null
@@ -0,0 +1,246 @@
+#include "assert.h"
+#include "fill_matrix.h"
+#include "debug_tools.h"
+
+using namespace std;
+
+void increaseFeatureCount(penalty_struct* qparam, int dnaChar, int estChar, double estprb) {
+
+   FA(estChar > 0 && estChar < 6);
+   FA(dnaChar > -1 && dnaChar < 6);
+
+   int currentPos = (estChar-1)*6+dnaChar;
+
+   penalty_struct currentStruct = qparam[currentPos];
+
+   //printf("Current index %d est/dna: %d %d score %f\n",currentPos,estChar,dnaChar,estprb);
+
+   //printf("before\n");
+   //int p_idx;
+   //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
+   //   printf("%f ",currentStruct.limits[p_idx]);
+   //}
+   //printf("\n");
+
+   //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
+   //   printf("%f ",currentStruct.penalties[p_idx]);
+   //}
+   //printf("\n");
+
+   double value = estprb;
+   int Lower = 0;
+   int idx;
+
+   for (idx=0;idx<currentStruct.len;idx++) {
+      if (currentStruct.limits[idx] <= value)
+         Lower++;
+   }
+
+   if (Lower == 0) {
+         currentStruct.penalties[0] += 1;
+         qparam[currentPos] = currentStruct;
+         return;
+   }
+
+   if (Lower == currentStruct.len) {
+         currentStruct.penalties[currentStruct.len-1] += 1;
+         qparam[currentPos] = currentStruct;
+         return;
+   }
+
+   Lower -= 1;
+   int Upper = Lower+1; // x-werte bleiben fest
+
+   double weightup  = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
+   double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
+   currentStruct.penalties[Upper] += weightup;
+   currentStruct.penalties[Lower] += weightlow; 
+
+   //printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
+   //printf("after\n");
+   //for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
+   //   printf("%f ",currentStruct.limits[p_idx]);
+   //}
+   //printf("\n");
+   //for(p_idx=0;p_idx<currentStruct.len;p_idx++)
+   //   printf("%f ",currentStruct.penalties[p_idx]);
+   //printf("\n");
+   qparam[currentPos] = currentStruct;
+}
+
+bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
+//Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
+
+{  
+  const int mlen=6; // length of matchmatrix
+  int startend[4] ; // [i_start, j_start, i_end, j_end] ;
+  startend[3] = max_score_positions[2*z] ; //i (est)
+  startend[4] = max_score_positions[2*z +1] ; //j (dna)
+  //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
+  int dnanum ; //using check_char: letter -> number
+  int estnum ; //using check_char: letter -> number
+
+  double prbnum ;
+  double chastitynum ;
+
+  int dna_pos ;
+  int est_pos ;
+  int splice_state ;
+  int est_state ;
+  for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
+    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+  for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
+    e_align[est_pos-1] = 4 ;
+  }
+  //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
+
+  /* starting point for backtracking */
+  int i = startend[3] ; 
+  int j = startend[4] ;
+  int act_m_no  = z ;
+  alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
+  //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
+
+  int prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i; 
+  int prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
+  int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+
+  //printf("Current z is %d\n",z);
+
+  if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
+  {
+    cout << "No additional path left (inf)!\n";
+    return 1;
+  }
+
+  if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
+    cout << "No additional path left (0)!\n";
+    return 1;
+  }
+
+  assert(dna_pos == j) ;
+  assert(est_pos == i) ;
+
+  splice_state = 0 ; //exon
+  est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
+
+  //compute length of alignment (between max(m,n) and m+n)     
+  //local_alignment: backtracking to first Zero: look at actual value
+  while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
+    
+    //just some info
+    //if ((z>0) && (!(z==prev_m_no))) {
+    //  printf("(%d,%d): next step in next matrix\n", i,j) ;
+    //}
+
+    if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      s_align[dna_pos-1] = splice_state; 
+      dna_pos-- ;
+      e_align[est_pos-1] = est_state ; //1 or 2, depended
+      est_pos-- ;
+      
+      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+
+      prbnum = prb[i-1];
+      chastitynum = chastity[i-1];
+
+      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
+
+      if(currentMode == USE_QUALITY_SCORES)
+         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      else
+         mparam[mlen*dnanum+estnum] ++ ;
+
+    }
+    else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      s_align[dna_pos-1] = splice_state; 
+      dna_pos-- ;
+
+      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+      estnum = 0 ; //gap
+
+      if(currentMode == USE_QUALITY_SCORES)
+         mparam[dnanum] ++ ;
+      else
+         mparam[mlen*dnanum +estnum] ++ ;
+
+    }
+    else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      e_align[est_pos-1] = est_state ;  //1 or 2, depended
+      est_pos-- ;
+
+      dnanum = 0 ; //gap
+      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+
+      prbnum = prb[i-1];
+      chastitynum = chastity[i-1];
+
+      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
+
+      if(currentMode == USE_QUALITY_SCORES)
+         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      else
+         mparam[mlen*dnanum +estnum] ++ ;
+    }
+    else {// splice site
+      (*result_length_ptr) =  (*result_length_ptr) + (j - prev_j);
+      if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
+       printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
+       printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
+       printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
+       printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
+       printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
+       printf("j: %d -> prev_j:%d\n", j, prev_j) ;
+       printf("i: %d -> prev_i:%d\n", i, prev_i) ;
+      }
+      if (est_state==1) { //was exon labeled "1"
+       est_state = 2 ; //new exon is labeled "2"
+      }
+      else {
+       est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
+      }
+      for (j; j > prev_j; j--) {
+       if (splice_state == 0) { //coming from exon
+         splice_state = 2 ; //first intron_state: acceptor
+       } else {
+         splice_state = 3 ; //intron
+       }
+       if (j-1 == prev_j) { //last intron_state: donor
+         splice_state = 1 ;//donor
+       }
+       s_align[dna_pos-1] = splice_state; 
+       dna_pos-- ;
+      }
+      splice_state = 0 ; //exon again
+      
+    }
+   
+    startend[1] = i;
+    startend[2] = j;
+    i = prev_i;
+    j = prev_j;
+    act_m_no = prev_m_no ;
+    
+    prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i; 
+    prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j; 
+    prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+  }
+  for (dna_pos; dna_pos>0; dna_pos--) {
+    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+  for (est_pos; est_pos>0; est_pos--) {
+    e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+
+  //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
+  //printf("result_length: %d\n", *result_length_ptr) ;
+  return 0;
+}
diff --git a/scripts/qpalma.py b/scripts/qpalma.py
deleted file mode 100644 (file)
index e32f8aa..0000000
+++ /dev/null
@@ -1,784 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-###########################################################
-#
-# The QPalma project aims at extending the Palma project 
-# to be able to use Solexa reads toegether with their 
-# quality scores.
-# 
-# This file represents the conversion of the main matlab 
-# training loop for Palma to python.
-# 
-# Author: Fabio De Bona
-# 
-###########################################################
-
-import sys
-import subprocess
-import scipy.io
-import pdb
-import os.path
-
-from numpy.matlib import mat,zeros,ones,inf
-from numpy.linalg import norm
-
-import QPalmaDP
-import qpalma
-from qpalma.SIQP_CPX import SIQPSolver
-from qpalma.DataProc import *
-from qpalma.generateEvaluationData import *
-from qpalma.computeSpliceWeights import *
-from qpalma.set_param_palma import *
-from qpalma.computeSpliceAlignWithQuality import *
-from qpalma.penalty_lookup_new import *
-from qpalma.compute_donacc import *
-from qpalma.export_param import *
-from qpalma.TrainingParam import Param
-from qpalma.Plif import Plf
-
-from qpalma.tools.splicesites import getDonAccScores
-
-from qpalma.Configuration import *
-
-class para:
-   pass
-
-class QPalma:
-   """
-   A training method for the QPalma project
-   """
-   
-   def __init__(self):
-      self.ARGS = Param()
-      self.logfh = open('qpalma.log','w+')
-
-      #gen_file= '%s/genome.config' % self.ARGS.basedir
-      #ginfo_filename = 'genome_info.pickle'
-      #self.genome_info = fetch_genome_info(ginfo_filename)
-      #self.plog('genome_info.basedir is %s\n'%self.genome_info.basedir)
-
-      #self.ARGS.train_with_splicesitescoreinformation = False
-
-      # Load the whole dataset 
-      if Conf.mode == 'normal':
-         #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
-         Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
-         self.use_quality_scores = False
-
-      elif Conf.mode == 'using_quality_scores':
-
-         filename = 'real_dset_%s'% 'recent'
-         if True:# not os.path.exists(filename):
-            Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
-
-            end = 1000
-            self.Sequences   = Sequences[:end]
-            self.Exons       = Exons[:end]
-            self.Ests        = Ests[:end]
-            self.Qualities   = Qualities[:end]
-            self.Acceptors   = Acceptors[:end]
-            self.Donors      = Donors[:end]
-
-            self.Donors, self.Acceptors = getDonAccScores(Sequences)
-            #dset = Dataset()
-         else:
-            dset = cPickle.load(open(filename))
-            Sequences = dset.Sequences
-            Acceptors = dset.Acceptors
-            Donors = dset.Donors
-            Exons =  dset.Exons
-            Ests = dset.Ests
-            Qualities = dset.Qualities
-
-         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
-         #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
-         #Qualities = []
-         #for i in range(len(Ests)):
-         #   Qualities.append([40]*len(Ests[i]))
-         self.use_quality_scores = True
-      else:
-         assert(False)
-
-      # number of training instances
-      N = len(Sequences) 
-      self.numExamples = N
-      assert N == len(Acceptors) and N == len(Acceptors) and N == len(Exons)\
-      and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
-      self.plog('Number of training examples: %d\n'% N)
-      print 'Number of features: %d\n'% Conf.numFeatures
-
-   def plog(self,string):
-      self.logfh.write(string)
-      self.logfh.flush()
-
-   def train(self):
-      beg = Conf.training_begin
-      end = Conf.training_end
-
-      Sequences   = self.Sequences[beg:end]
-      Exons       = self.Exons[beg:end]
-      Ests        = self.Ests[beg:end]
-      Qualities   = self.Qualities[beg:end]
-      Acceptors   = self.Acceptors[beg:end]
-      Donors      = self.Donors[beg:end]
-
-      iteration_steps         = Conf.iter_steps ; #upper bound on iteration steps
-      remove_duplicate_scores = Conf.remove_duplicate_scores 
-      print_matrix            = Conf.print_matrix 
-      anzpath                 = Conf.anzpath 
-
-      # Initialize parameter vector  / param = numpy.matlib.rand(126,1)
-      param = Conf.fixedParam 
-
-      # Set the parameters such as limits penalties for the Plifs
-      [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
-
-      # delete splicesite-score-information
-      #if not self.ARGS.train_with_splicesitescoreinformation:
-      #   for i in range(len(Acceptors)):
-      #      if Acceptors[i] > -20:
-      #         Acceptors[i] = 1
-      #      if Donors[i] >-20:
-      #         Donors[i] = 1
-
-      # Initialize solver 
-      if Conf.USE_OPT:
-         self.plog('Initializing problem...\n')
-         solver = SIQPSolver(Conf.numFeatures,self.numExamples,Conf.C,self.logfh)
-
-      # stores the number of alignments done for each example (best path, second-best path etc.)
-      num_path = [anzpath]*self.numExamples
-      # stores the gap for each example
-      gap      = [0.0]*self.numExamples
-
-      #############################################################################################
-      # Training
-      #############################################################################################
-      self.plog('Starting training...\n')
-
-      donSP       = Conf.numDonSuppPoints
-      accSP       = Conf.numAccSuppPoints
-      lengthSP    = Conf.numLengthSuppPoints
-      mmatrixSP   = Conf.sizeMatchmatrix[0]\
-      *Conf.sizeMatchmatrix[1]
-      numq        = Conf.numQualSuppPoints
-      totalQualSP = Conf.totalQualSuppPoints
-
-      currentPhi = zeros((Conf.numFeatures,1))
-      totalQualityPenalties = zeros((totalQualSP,1))
-
-      iteration_nr = 0
-      param_idx = 0
-      const_added_ctr = 0
-      while True:
-         if iteration_nr == iteration_steps:
-            break
-
-         for exampleIdx in range(self.numExamples):
-            if (exampleIdx%100) == 0:
-               print 'Current example nr %d' % exampleIdx
-
-            dna = Sequences[exampleIdx] 
-            est = Ests[exampleIdx] 
-
-            if Conf.mode == 'normal':
-               quality = [40]*len(est)
-
-            if Conf.mode == 'using_quality_scores':
-               quality = Qualities[exampleIdx]
-
-            exons = Exons[exampleIdx] 
-            # NoiseMatrix = Noises[exampleIdx] 
-            don_supp = Donors[exampleIdx] 
-            acc_supp = Acceptors[exampleIdx] 
-
-            # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
-            trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
-            
-            #print 'trueWeights' 
-            # Calculate the weights
-            trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
-            trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
-
-            currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
-            currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
-            currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
-            currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
-
-            #pdb.set_trace()
-            if Conf.mode == 'using_quality_scores':
-               totalQualityPenalties = param[-totalQualSP:]
-               currentPhi[donSP+accSP+lengthSP+mmatrixSP:]                    = totalQualityPenalties[:]
-
-            # Calculate w'phi(x,y) the total score of the alignment
-            trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
-
-            # The allWeights vector is supposed to store the weight parameter
-            # of the true alignment as well as the weight parameters of the
-            # num_path[exampleIdx] other alignments
-            allWeights = zeros((Conf.numFeatures,num_path[exampleIdx]+1))
-            allWeights[:,0] = trueWeight[:,0]
-
-            AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
-            AlignmentScores[0] = trueAlignmentScore
-
-            ################## Calculate wrong alignment(s) ######################
-
-            # Compute donor, acceptor with penalty_lookup_new
-            # returns two double lists
-            donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
-
-            #myalign wants the acceptor site on the g of the ag
-            acceptor = acceptor[1:]
-            acceptor.append(-inf)
-
-            # for now we don't use donor/acceptor scores
-
-            #donor = [-inf] * len(donor)
-            #acceptor = [-inf] * len(donor)
-
-            dna = str(dna)
-            est = str(est)
-            dna_len = len(dna)
-            est_len = len(est)
-
-            ps = h.convert2SWIG()
-
-            prb = QPalmaDP.createDoubleArrayFromList(quality)
-            chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
-
-            matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
-            mm_len = Conf.sizeMatchmatrix[0]*Conf.sizeMatchmatrix[1]
-
-            d_len = len(donor)
-            donor = QPalmaDP.createDoubleArrayFromList(donor)
-            a_len = len(acceptor)
-            acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
-
-            # Create the alignment object representing the interface to the C/C++ code.
-            currentAlignment = QPalmaDP.Alignment(Conf.numQualPlifs,Conf.numQualSuppPoints, self.use_quality_scores)
-
-            c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
-            #print 'Calling myalign...'
-            # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
-            currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
-             est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
-             acceptor, a_len, c_qualityPlifs, remove_duplicate_scores,
-             print_matrix)
-
-            #print 'After calling myalign...'
-            #print 'Calling getAlignmentResults...'
-
-            c_SpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
-            c_EstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
-            c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
-            c_DPScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
-
-            c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Conf.totalQualSuppPoints*num_path[exampleIdx]))
-
-            currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
-            c_WeightMatch, c_DPScores, c_qualityPlifsFeatures)
-
-            #print 'After calling getAlignmentResults...'
-
-            newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
-            newEstAlign    = zeros((est_len*num_path[exampleIdx],1))
-            newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
-            newDPScores    = zeros((num_path[exampleIdx],1))
-            newQualityPlifsFeatures = zeros((Conf.totalQualSuppPoints*num_path[exampleIdx],1))
-
-            #print 'newSpliceAlign'
-            for i in range(dna_len*num_path[exampleIdx]):
-               newSpliceAlign[i] = c_SpliceAlign[i]
-            #   print '%f' % (spliceAlign[i])
-
-            #print 'newEstAlign'
-            for i in range(est_len*num_path[exampleIdx]):
-               newEstAlign[i] = c_EstAlign[i]
-            #   print '%f' % (spliceAlign[i])
-
-            #print 'weightMatch'
-            for i in range(mm_len*num_path[exampleIdx]):
-               newWeightMatch[i] = c_WeightMatch[i]
-            #   print '%f' % (weightMatch[i])
-
-            #print 'ViterbiScores'
-            for i in range(num_path[exampleIdx]):
-               newDPScores[i] = c_DPScores[i]
-
-
-            if self.use_quality_scores:
-               for i in range(Conf.totalQualSuppPoints*num_path[exampleIdx]):
-                  newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
-
-            #  equals palma up to here
-
-            #print "Calling destructors"
-            del c_SpliceAlign
-            del c_EstAlign
-            del c_WeightMatch
-            del c_DPScores
-            del c_qualityPlifsFeatures
-            del currentAlignment
-
-            newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],dna_len)
-            newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
-            # Calculate weights of the respective alignments Note that we are
-            # calculating n-best alignments without hamming loss, so we
-            # have to keep track which of the n-best alignments correspond to
-            # the true one in order not to incorporate a true alignment in the
-            # constraints. To keep track of the true and false alignments we
-            # define an array true_map with a boolean indicating the
-            # equivalence to the true alignment for each decoded alignment.
-            true_map = [0]*(num_path[exampleIdx]+1)
-            true_map[0] = 1
-            path_loss   = [0]*(num_path[exampleIdx])
-
-            for pathNr in range(num_path[exampleIdx]):
-               #print 'decodedWeights' 
-               weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,\
-               h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,\
-               acc_supp)
-
-               decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
-               for qidx in range(Conf.totalQualSuppPoints):
-                  decodedQualityFeatures[qidx] = newQualityPlifsFeatures[(pathNr*Conf.totalQualSuppPoints)+qidx]
-
-               #pdb.set_trace()
-
-               path_loss[pathNr] = 0
-               # sum up positionwise loss between alignments
-               for alignPosIdx in range(newSpliceAlign[pathNr,:].shape[1]):
-                  if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
-                     path_loss[pathNr] += 1
-
-               #pdb.set_trace()
-
-               # Gewichte in restliche Zeilen der Matrix speichern
-               wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures])
-               allWeights[:,pathNr+1] = wp
-
-               hpen = mat(h.penalties).reshape(len(h.penalties),1)
-               dpen = mat(d.penalties).reshape(len(d.penalties),1)
-               apen = mat(a.penalties).reshape(len(a.penalties),1)
-               features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
-
-               AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
-
-               # Check wether scalar product + loss equals viterbi score
-               #print 'Example nr.: %d, path nr. %d, scores: %f vs %f' % (exampleIdx,pathNr,newDPScores[pathNr,0], AlignmentScores[pathNr+1])
-
-               distinct_scores = False
-               if math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) > 1e-5:
-                  distinct_scores = True
-               
-               #if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) + [0,1][distinct_scores and (pathNr>0)] <= 1e-5:
-               if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
-                  pdb.set_trace()
-
-               #  # if the pathNr-best alignment is very close to the true alignment consider it as true
-               if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
-                  true_map[pathNr+1] = 1
-
-               # assert AlignmentScores[0] > max(AlignmentScores[1:]) + 1e-6, pdb.set_trace()
-
-               # the true label sequence should not have a larger score than the maximal one WHYYYYY?
-               # this means that all n-best paths are to close to each other 
-               # we have to extend the n-best search to a (n+1)-best
-               if len([elem for elem in true_map if elem == 1]) == len(true_map):
-                  num_path[exampleIdx] = num_path[exampleIdx]+1
-
-               # Choose true and first false alignment for extending A
-               firstFalseIdx = -1
-               for map_idx,elem in enumerate(true_map):
-                  if elem == 0:
-                     firstFalseIdx = map_idx
-                     break
-
-               # if there is at least one useful false alignment add the
-               # corresponding constraints to the optimization problem
-               if firstFalseIdx != -1:
-                  trueWeights       = allWeights[:,0]
-                  firstFalseWeights = allWeights[:,firstFalseIdx]
-                  differenceVector  = trueWeights - firstFalseWeights
-                  #pdb.set_trace()
-
-                  if Conf.USE_OPT:
-                     const_added = solver.addConstraint(differenceVector, exampleIdx)
-                     const_added_ctr += 1
-               #
-               # end of one example processing 
-               #
-
-            # call solver every nth example //added constraint
-            if exampleIdx != 0 and exampleIdx % 100 == 0 and Conf.USE_OPT:
-               objValue,w,self.slacks = solver.solve()
-      
-               print "objValue is %f" % objValue
-
-               sum_xis = 0
-               for elem in self.slacks:
-                  sum_xis +=  elem
-   
-               for i in range(len(param)):
-                  param[i] = w[i]
-
-               #pdb.set_trace()
-               cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
-               param_idx += 1
-               [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
-
-         #
-         # end of one iteration through all examples
-         #
-         iteration_nr += 1
-
-      #
-      # end of optimization 
-      #  
-      print 'Training completed'
-
-      pa = para()
-      pa.h = h
-      pa.d = d
-      pa.a = a
-      pa.mmatrix = mmatrix
-      pa.qualityPlifs = qualityPlifs
-
-      cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
-      #cPickle.dump(pa,open('elegans.param','w+'))
-      self.logfh.close()
-
-
-
-   def predict(self):
-      beg = Conf.prediction_begin
-      end = Conf.prediction_end
-
-      Sequences   = self.Sequences[beg:end]
-      Exons       = self.Exons[beg:end]
-      Ests        = self.Ests[beg:end]
-      Qualities   = self.Qualities[beg:end]
-      Acceptors   = self.Acceptors[beg:end]
-      Donors      = self.Donors[beg:end]
-
-      remove_duplicate_scores = Conf.remove_duplicate_scores 
-      print_matrix            = Conf.print_matrix 
-      anzpath                 = Conf.anzpath 
-
-      param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_28.pickle'
-      param = load_param(param_filename)
-
-      # Set the parameters such as limits penalties for the Plifs
-      [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
-
-      #############################################################################################
-      # Prediction
-      #############################################################################################
-      self.plog('Starting prediction...\n')
-
-      donSP       = Conf.numDonSuppPoints
-      accSP       = Conf.numAccSuppPoints
-      lengthSP    = Conf.numLengthSuppPoints
-      mmatrixSP   = Conf.sizeMatchmatrix[0]\
-      *Conf.sizeMatchmatrix[1]
-      numq        = Conf.numQualSuppPoints
-      totalQualSP = Conf.totalQualSuppPoints
-
-      currentPhi = zeros((Conf.numFeatures,1))
-      totalQualityPenalties = zeros((totalQualSP,1))
-
-      total_up_off = []
-      total_down_off = []
-
-      for exampleIdx in range(self.numExamples):
-         dna = Sequences[exampleIdx] 
-         est = Ests[exampleIdx] 
-         currentSplitPos = SplitPos[exampleIdx]
-
-         if Conf.mode == 'normal':
-            quality = [40]*len(est)
-
-         if Conf.mode == 'using_quality_scores':
-            quality = Qualities[exampleIdx]
-
-         exons = Exons[exampleIdx] 
-         # NoiseMatrix = Noises[exampleIdx] 
-         don_supp = Donors[exampleIdx] 
-         acc_supp = Acceptors[exampleIdx] 
-
-         if exons[-1,1] > len(dna):
-            continue
-
-         # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
-         trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
-         
-         # Calculate the weights
-         trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
-         trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
-
-         currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
-         currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
-         currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
-         currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
-
-         if Conf.mode == 'using_quality_scores':
-            totalQualityPenalties = param[-totalQualSP:]
-            currentPhi[donSP+accSP+lengthSP+mmatrixSP:]                    = totalQualityPenalties[:]
-
-         # Calculate w'phi(x,y) the total score of the alignment
-         trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
-
-         # The allWeights vector is supposed to store the weight parameter
-         # of the true alignment as well as the weight parameters of the
-         # 1 other alignments
-         allWeights = zeros((Conf.numFeatures,1+1))
-         allWeights[:,0] = trueWeight[:,0]
-
-         AlignmentScores = [0.0]*(1+1)
-         AlignmentScores[0] = trueAlignmentScore
-
-         ################## Calculate wrong alignment(s) ######################
-
-         # Compute donor, acceptor with penalty_lookup_new
-         # returns two double lists
-         donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
-
-         #myalign wants the acceptor site on the g of the ag
-         acceptor = acceptor[1:]
-         acceptor.append(-inf)
-
-         dna = str(dna)
-         est = str(est)
-         dna_len = len(dna)
-         est_len = len(est)
-
-         ps = h.convert2SWIG()
-
-         prb = QPalmaDP.createDoubleArrayFromList(quality)
-         chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
-
-         matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
-         mm_len = Conf.sizeMatchmatrix[0]*Conf.sizeMatchmatrix[1]
-
-         d_len = len(donor)
-         donor = QPalmaDP.createDoubleArrayFromList(donor)
-         a_len = len(acceptor)
-         acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
-
-         # Create the alignment object representing the interface to the C/C++ code.
-         currentAlignment = QPalmaDP.Alignment(Conf.numQualPlifs,Conf.numQualSuppPoints, self.use_quality_scores)
-
-         c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
-
-         # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
-         currentAlignment.myalign(1, dna, dna_len,\
-          est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
-          acceptor, a_len, c_qualityPlifs, remove_duplicate_scores,
-          print_matrix)
-
-         c_SpliceAlign  = QPalmaDP.createIntArrayFromList([0]*(dna_len*1))
-         c_EstAlign     = QPalmaDP.createIntArrayFromList([0]*(est_len*1))
-         c_WeightMatch  = QPalmaDP.createIntArrayFromList([0]*(mm_len*1))
-         c_DPScores     = QPalmaDP.createDoubleArrayFromList([.0]*1)
-
-         c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Conf.totalQualSuppPoints))
-
-         currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
-         c_WeightMatch, c_DPScores, c_qualityPlifsFeatures)
-
-         newSpliceAlign = zeros((dna_len,1))
-         newEstAlign    = zeros((est_len,1))
-         newWeightMatch = zeros((mm_len,1))
-         newQualityPlifsFeatures = zeros((Conf.totalQualSuppPoints,1))
-
-         #print 'newSpliceAlign'
-         for i in range(dna_len):
-            newSpliceAlign[i] = c_SpliceAlign[i]
-         #   print '%f' % (spliceAlign[i])
-
-         #print 'newEstAlign'
-         for i in range(est_len):
-            newEstAlign[i] = c_EstAlign[i]
-         #   print '%f' % (spliceAlign[i])
-
-         #print 'weightMatch'
-         for i in range(mm_len):
-            newWeightMatch[i] = c_WeightMatch[i]
-         #   print '%f' % (weightMatch[i])
-
-         newDPScores = c_DPScores[0]
-
-         for i in range(Conf.totalQualSuppPoints):
-            newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
-
-         #print "Calling destructors"
-         del c_SpliceAlign
-         del c_EstAlign
-         del c_WeightMatch
-         del c_DPScores
-         del c_qualityPlifsFeatures
-         del currentAlignment
-
-         newSpliceAlign = newSpliceAlign.reshape(1,dna_len)
-         newWeightMatch = newWeightMatch.reshape(1,mm_len)
-         # Calculate weights of the respective alignments Note that we are
-         # calculating n-best alignments without hamming loss, so we
-         # have to keep track which of the n-best alignments correspond to
-         # the true one in order not to incorporate a true alignment in the
-         # constraints. To keep track of the true and false alignments we
-         # define an array true_map with a boolean indicating the
-         # equivalence to the true alignment for each decoded alignment.
-         true_map = [0]*2
-         true_map[0] = 1
-         pathNr = 0
-
-         weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign.flatten().tolist()[0], don_supp, acc_supp)
-
-         decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
-         for qidx in range(Conf.totalQualSuppPoints):
-            decodedQualityFeatures[qidx] = newQualityPlifsFeatures[qidx]
-
-         # Gewichte in restliche Zeilen der Matrix speichern
-         wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch.T, decodedQualityFeatures])
-         allWeights[:,pathNr+1] = wp
-
-         hpen = mat(h.penalties).reshape(len(h.penalties),1)
-         dpen = mat(d.penalties).reshape(len(d.penalties),1)
-         apen = mat(a.penalties).reshape(len(a.penalties),1)
-         features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
-         AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
-
-         # if the pathNr-best alignment is very close to the true alignment consider it as true
-         if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
-            true_map[pathNr+1] = 1
-
-         up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
-         #evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
-         #print up_off,down_off 
-
-         if up_off > -1:
-            total_up_off.append(up_off)
-            total_down_off.append(down_off)
-
-      total_up = 0
-      total_down = 0 
-      for idx in range(len(total_up_off)):
-         total_up    += total_up_off[idx]
-         total_down  += total_down_off[idx]
-         
-      total_up /= len(total_up_off)
-      total_down /= len(total_down_off)
-
-      print 'Mean up_off is %f' % total_up
-      print 'Mean down_off is %f' % total_down
-      print 'Correct up_off len is %d' % len([elem for elem in total_up_off if elem in [0,1]])
-      print 'Correct down_off len is %d' % len([elem for elem in total_down_off if elem in [0,1]])
-
-      print 'Prediction completed'
-      self.logfh.close()
-
-def fetch_genome_info(ginfo_filename):
-   if not os.path.exists(ginfo_filename):
-      cmd = ['']*4
-      cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
-      cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
-      cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
-      cmd[3] = 'save genome_info.mat genome_info'  
-      full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
-
-      obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-      out,err = obj.communicate()
-      assert err == '', 'An error occured!\n%s'%err
-
-      ginfo = scipy.io.loadmat('genome_info.mat')
-      cPickle.dump(self.genome_info,open(ginfo_filename,'w+'))
-      return ginfo['genome_info']
-
-   else:
-      return cPickle.load(open(ginfo_filename))
-
-def plifs2param(h,d,a,mmatrix,qualityPlifs):
-   donSP       = Conf.numDonSuppPoints
-   accSP       = Conf.numAccSuppPoints
-   lengthSP    = Conf.numLengthSuppPoints
-   mmatrixSP   = Conf.sizeMatchmatrix[0]\
-   *Conf.sizeMatchmatrix[1]
-
-
-   param = zeros((Conf.numFeatures,1))
-   param[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
-   param[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
-   param[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
-   param[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
-
-   for idx in range(len(qualityPlifs)):
-      currentPlif       = qualityPlifs[idx]
-      begin             = lengthSP+donSP+accSP+mmatrixSP+(idx*Conf.numQualSuppPoints)
-      end               = lengthSP+donSP+accSP+mmatrixSP+((idx+1)*Conf.numQualSuppPoints)
-      param[begin:end]  = mat(currentPlif.penalties).reshape(Conf.numQualSuppPoints,1)
-
-   return param
-
-def load_param(filename):
-   param = None 
-   #try:
-   #   para = cPickle.load(open(filename))
-   #except:
-   #   print 'Error: Could not open parameter file!'
-   #   sys.exit(1)
-   #
-   #param = plifs2param(para.h,para.d,para.a,para.mmatrix,para.qualityPlifs)
-
-   param = cPickle.load(open(filename))
-   return param
-
-def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign,spos):
-   newExons = []
-   oldElem = -1
-   SpliceAlign = SpliceAlign.flatten().tolist()[0]
-   SpliceAlign.append(-1)
-   for pos,elem in enumerate(SpliceAlign):
-      if pos == 0:
-         oldElem = -1
-      else:
-         oldElem = SpliceAlign[pos-1]
-
-      if oldElem != 0 and elem == 0: # start of exon
-         newExons.append(pos-1)
-
-      if oldElem == 0 and elem != 0: # end of exon
-         newExons.append(pos)
-
-   up_off   = -1
-   down_off = -1
-
-   if len(newExons) != 4:
-      acc = 0.0
-   else:
-      e1_begin,e1_end = newExons[0],newExons[1]
-      e2_begin,e2_end = newExons[2],newExons[3]
-
-      up_off   = int(math.fabs(e1_end - exons[0,1]))
-      down_off = int(math.fabs(e2_begin - exons[1,0]))
-
-   return up_off,down_off
-
-
-if __name__ == '__main__':
-
-   qpalma = QPalma()
-   if len(sys.argv) != 2:
-      print 'You have to choose between training or prediction mode:'
-      print 'python qpalma. py (train|predict)' 
-
-   mode = sys.argv[1]
-   assert mode in ['train','predict']
-
-   if mode == 'train':
-      qpalma.train()
-      
-   if mode == 'predict':
-      qpalma.predict()
-
diff --git a/scripts/qpalma_main.py b/scripts/qpalma_main.py
new file mode 100644 (file)
index 0000000..05d4f13
--- /dev/null
@@ -0,0 +1,784 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+###########################################################
+#
+# The QPalma project aims at extending the Palma project 
+# to be able to use Solexa reads toegether with their 
+# quality scores.
+# 
+# This file represents the conversion of the main matlab 
+# training loop for Palma to python.
+# 
+# Author: Fabio De Bona
+# 
+###########################################################
+
+import sys
+import subprocess
+import scipy.io
+import pdb
+import os.path
+
+from numpy.matlib import mat,zeros,ones,inf
+from numpy.linalg import norm
+
+import QPalmaDP
+import qpalma
+from qpalma.SIQP_CPX import SIQPSolver
+from qpalma.DataProc import *
+from qpalma.generateEvaluationData import *
+from qpalma.computeSpliceWeights import *
+from qpalma.set_param_palma import *
+from qpalma.computeSpliceAlignWithQuality import *
+from qpalma.penalty_lookup_new import *
+from qpalma.compute_donacc import *
+from qpalma.export_param import *
+from qpalma.TrainingParam import Param
+from qpalma.Plif import Plf
+
+from qpalma.tools.splicesites import getDonAccScores
+
+from qpalma.Configuration import *
+
+class para:
+   pass
+
+class QPalma:
+   """
+   A training method for the QPalma project
+   """
+   
+   def __init__(self):
+      self.ARGS = Param()
+      self.logfh = open('_qpalma.log','w+')
+
+      #gen_file= '%s/genome.config' % self.ARGS.basedir
+      #ginfo_filename = 'genome_info.pickle'
+      #self.genome_info = fetch_genome_info(ginfo_filename)
+      #self.plog('genome_info.basedir is %s\n'%self.genome_info.basedir)
+
+      #self.ARGS.train_with_splicesitescoreinformation = False
+
+      # Load the whole dataset 
+      if Conf.mode == 'normal':
+         #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
+         Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+         self.use_quality_scores = False
+
+      elif Conf.mode == 'using_quality_scores':
+
+         filename = 'real_dset_%s'% 'recent'
+         if True:# not os.path.exists(filename):
+            Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
+
+            end = 1000
+            self.Sequences   = Sequences[:end]
+            self.Exons       = Exons[:end]
+            self.Ests        = Ests[:end]
+            self.Qualities   = Qualities[:end]
+            self.Acceptors   = Acceptors[:end]
+            self.Donors      = Donors[:end]
+
+            self.Donors, self.Acceptors = getDonAccScores(Sequences)
+            #dset = Dataset()
+         else:
+            dset = cPickle.load(open(filename))
+            Sequences = dset.Sequences
+            Acceptors = dset.Acceptors
+            Donors = dset.Donors
+            Exons =  dset.Exons
+            Ests = dset.Ests
+            Qualities = dset.Qualities
+
+         #Sequences, Acceptors, Donors, Exons, Ests, Qualities = loadArtificialData(1000)
+         #Sequences, Acceptors, Donors, Exons, Ests, Noises = paths_load_data_pickle('training',self.genome_info,self.ARGS)
+         #Qualities = []
+         #for i in range(len(Ests)):
+         #   Qualities.append([40]*len(Ests[i]))
+         self.use_quality_scores = True
+      else:
+         assert(False)
+
+      # number of training instances
+      N = len(Sequences) 
+      self.numExamples = N
+      assert N == len(Acceptors) and N == len(Acceptors) and N == len(Exons)\
+      and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
+      self.plog('Number of training examples: %d\n'% N)
+      print 'Number of features: %d\n'% Conf.numFeatures
+
+   def plog(self,string):
+      self.logfh.write(string)
+      self.logfh.flush()
+
+   def train(self):
+      beg = Conf.training_begin
+      end = Conf.training_end
+
+      Sequences   = self.Sequences[beg:end]
+      Exons       = self.Exons[beg:end]
+      Ests        = self.Ests[beg:end]
+      Qualities   = self.Qualities[beg:end]
+      Acceptors   = self.Acceptors[beg:end]
+      Donors      = self.Donors[beg:end]
+
+      iteration_steps         = Conf.iter_steps ; #upper bound on iteration steps
+      remove_duplicate_scores = Conf.remove_duplicate_scores 
+      print_matrix            = Conf.print_matrix 
+      anzpath                 = Conf.anzpath 
+
+      # Initialize parameter vector  / param = numpy.matlib.rand(126,1)
+      param = Conf.fixedParam 
+
+      # Set the parameters such as limits penalties for the Plifs
+      [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+
+      # delete splicesite-score-information
+      #if not self.ARGS.train_with_splicesitescoreinformation:
+      #   for i in range(len(Acceptors)):
+      #      if Acceptors[i] > -20:
+      #         Acceptors[i] = 1
+      #      if Donors[i] >-20:
+      #         Donors[i] = 1
+
+      # Initialize solver 
+      if Conf.USE_OPT:
+         self.plog('Initializing problem...\n')
+         solver = SIQPSolver(Conf.numFeatures,self.numExamples,Conf.C,self.logfh)
+
+      # stores the number of alignments done for each example (best path, second-best path etc.)
+      num_path = [anzpath]*self.numExamples
+      # stores the gap for each example
+      gap      = [0.0]*self.numExamples
+
+      #############################################################################################
+      # Training
+      #############################################################################################
+      self.plog('Starting training...\n')
+
+      donSP       = Conf.numDonSuppPoints
+      accSP       = Conf.numAccSuppPoints
+      lengthSP    = Conf.numLengthSuppPoints
+      mmatrixSP   = Conf.sizeMatchmatrix[0]\
+      *Conf.sizeMatchmatrix[1]
+      numq        = Conf.numQualSuppPoints
+      totalQualSP = Conf.totalQualSuppPoints
+
+      currentPhi = zeros((Conf.numFeatures,1))
+      totalQualityPenalties = zeros((totalQualSP,1))
+
+      iteration_nr = 0
+      param_idx = 0
+      const_added_ctr = 0
+      while True:
+         if iteration_nr == iteration_steps:
+            break
+
+         for exampleIdx in range(self.numExamples):
+            if (exampleIdx%100) == 0:
+               print 'Current example nr %d' % exampleIdx
+
+            dna = Sequences[exampleIdx] 
+            est = Ests[exampleIdx] 
+
+            if Conf.mode == 'normal':
+               quality = [40]*len(est)
+
+            if Conf.mode == 'using_quality_scores':
+               quality = Qualities[exampleIdx]
+
+            exons = Exons[exampleIdx] 
+            # NoiseMatrix = Noises[exampleIdx] 
+            don_supp = Donors[exampleIdx] 
+            acc_supp = Acceptors[exampleIdx] 
+
+            # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
+            trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
+            
+            #print 'trueWeights' 
+            # Calculate the weights
+            trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
+            trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
+
+            currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
+            currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
+            currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
+            currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
+
+            #pdb.set_trace()
+            if Conf.mode == 'using_quality_scores':
+               totalQualityPenalties = param[-totalQualSP:]
+               currentPhi[donSP+accSP+lengthSP+mmatrixSP:]                    = totalQualityPenalties[:]
+
+            # Calculate w'phi(x,y) the total score of the alignment
+            trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
+
+            # The allWeights vector is supposed to store the weight parameter
+            # of the true alignment as well as the weight parameters of the
+            # num_path[exampleIdx] other alignments
+            allWeights = zeros((Conf.numFeatures,num_path[exampleIdx]+1))
+            allWeights[:,0] = trueWeight[:,0]
+
+            AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
+            AlignmentScores[0] = trueAlignmentScore
+
+            ################## Calculate wrong alignment(s) ######################
+
+            # Compute donor, acceptor with penalty_lookup_new
+            # returns two double lists
+            donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
+
+            #myalign wants the acceptor site on the g of the ag
+            acceptor = acceptor[1:]
+            acceptor.append(-inf)
+
+            # for now we don't use donor/acceptor scores
+
+            #donor = [-inf] * len(donor)
+            #acceptor = [-inf] * len(donor)
+
+            dna = str(dna)
+            est = str(est)
+            dna_len = len(dna)
+            est_len = len(est)
+
+            ps = h.convert2SWIG()
+
+            prb = QPalmaDP.createDoubleArrayFromList(quality)
+            chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
+
+            matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
+            mm_len = Conf.sizeMatchmatrix[0]*Conf.sizeMatchmatrix[1]
+
+            d_len = len(donor)
+            donor = QPalmaDP.createDoubleArrayFromList(donor)
+            a_len = len(acceptor)
+            acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
+
+            # Create the alignment object representing the interface to the C/C++ code.
+            currentAlignment = QPalmaDP.Alignment(Conf.numQualPlifs,Conf.numQualSuppPoints, self.use_quality_scores)
+
+            c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
+            #print 'Calling myalign...'
+            # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
+            currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
+             est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
+             acceptor, a_len, c_qualityPlifs, remove_duplicate_scores,
+             print_matrix)
+
+            #print 'After calling myalign...'
+            #print 'Calling getAlignmentResults...'
+
+            c_SpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
+            c_EstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
+            c_WeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
+            c_DPScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
+
+            c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Conf.totalQualSuppPoints*num_path[exampleIdx]))
+
+            currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
+            c_WeightMatch, c_DPScores, c_qualityPlifsFeatures)
+
+            #print 'After calling getAlignmentResults...'
+
+            newSpliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
+            newEstAlign    = zeros((est_len*num_path[exampleIdx],1))
+            newWeightMatch = zeros((num_path[exampleIdx]*mm_len,1))
+            newDPScores    = zeros((num_path[exampleIdx],1))
+            newQualityPlifsFeatures = zeros((Conf.totalQualSuppPoints*num_path[exampleIdx],1))
+
+            #print 'newSpliceAlign'
+            for i in range(dna_len*num_path[exampleIdx]):
+               newSpliceAlign[i] = c_SpliceAlign[i]
+            #   print '%f' % (spliceAlign[i])
+
+            #print 'newEstAlign'
+            for i in range(est_len*num_path[exampleIdx]):
+               newEstAlign[i] = c_EstAlign[i]
+            #   print '%f' % (spliceAlign[i])
+
+            #print 'weightMatch'
+            for i in range(mm_len*num_path[exampleIdx]):
+               newWeightMatch[i] = c_WeightMatch[i]
+            #   print '%f' % (weightMatch[i])
+
+            #print 'ViterbiScores'
+            for i in range(num_path[exampleIdx]):
+               newDPScores[i] = c_DPScores[i]
+
+
+            if self.use_quality_scores:
+               for i in range(Conf.totalQualSuppPoints*num_path[exampleIdx]):
+                  newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
+
+            #  equals palma up to here
+
+            #print "Calling destructors"
+            del c_SpliceAlign
+            del c_EstAlign
+            del c_WeightMatch
+            del c_DPScores
+            del c_qualityPlifsFeatures
+            del currentAlignment
+
+            newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],dna_len)
+            newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
+            # Calculate weights of the respective alignments Note that we are
+            # calculating n-best alignments without hamming loss, so we
+            # have to keep track which of the n-best alignments correspond to
+            # the true one in order not to incorporate a true alignment in the
+            # constraints. To keep track of the true and false alignments we
+            # define an array true_map with a boolean indicating the
+            # equivalence to the true alignment for each decoded alignment.
+            true_map = [0]*(num_path[exampleIdx]+1)
+            true_map[0] = 1
+            path_loss   = [0]*(num_path[exampleIdx])
+
+            for pathNr in range(num_path[exampleIdx]):
+               #print 'decodedWeights' 
+               weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,\
+               h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,\
+               acc_supp)
+
+               decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
+               for qidx in range(Conf.totalQualSuppPoints):
+                  decodedQualityFeatures[qidx] = newQualityPlifsFeatures[(pathNr*Conf.totalQualSuppPoints)+qidx]
+
+               #pdb.set_trace()
+
+               path_loss[pathNr] = 0
+               # sum up positionwise loss between alignments
+               for alignPosIdx in range(newSpliceAlign[pathNr,:].shape[1]):
+                  if newSpliceAlign[pathNr,alignPosIdx] != trueSpliceAlign[alignPosIdx]:
+                     path_loss[pathNr] += 1
+
+               #pdb.set_trace()
+
+               # Gewichte in restliche Zeilen der Matrix speichern
+               wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures])
+               allWeights[:,pathNr+1] = wp
+
+               hpen = mat(h.penalties).reshape(len(h.penalties),1)
+               dpen = mat(d.penalties).reshape(len(d.penalties),1)
+               apen = mat(a.penalties).reshape(len(a.penalties),1)
+               features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
+
+               AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
+
+               # Check wether scalar product + loss equals viterbi score
+               #print 'Example nr.: %d, path nr. %d, scores: %f vs %f' % (exampleIdx,pathNr,newDPScores[pathNr,0], AlignmentScores[pathNr+1])
+
+               distinct_scores = False
+               if math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) > 1e-5:
+                  distinct_scores = True
+               
+               #if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) + [0,1][distinct_scores and (pathNr>0)] <= 1e-5:
+               if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
+                  pdb.set_trace()
+
+               #  # if the pathNr-best alignment is very close to the true alignment consider it as true
+               if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
+                  true_map[pathNr+1] = 1
+
+               # assert AlignmentScores[0] > max(AlignmentScores[1:]) + 1e-6, pdb.set_trace()
+
+               # the true label sequence should not have a larger score than the maximal one WHYYYYY?
+               # this means that all n-best paths are to close to each other 
+               # we have to extend the n-best search to a (n+1)-best
+               if len([elem for elem in true_map if elem == 1]) == len(true_map):
+                  num_path[exampleIdx] = num_path[exampleIdx]+1
+
+               # Choose true and first false alignment for extending A
+               firstFalseIdx = -1
+               for map_idx,elem in enumerate(true_map):
+                  if elem == 0:
+                     firstFalseIdx = map_idx
+                     break
+
+               # if there is at least one useful false alignment add the
+               # corresponding constraints to the optimization problem
+               if firstFalseIdx != -1:
+                  trueWeights       = allWeights[:,0]
+                  firstFalseWeights = allWeights[:,firstFalseIdx]
+                  differenceVector  = trueWeights - firstFalseWeights
+                  #pdb.set_trace()
+
+                  if Conf.USE_OPT:
+                     const_added = solver.addConstraint(differenceVector, exampleIdx)
+                     const_added_ctr += 1
+               #
+               # end of one example processing 
+               #
+
+            # call solver every nth example //added constraint
+            if exampleIdx != 0 and exampleIdx % 100 == 0 and Conf.USE_OPT:
+               objValue,w,self.slacks = solver.solve()
+      
+               print "objValue is %f" % objValue
+
+               sum_xis = 0
+               for elem in self.slacks:
+                  sum_xis +=  elem
+   
+               for i in range(len(param)):
+                  param[i] = w[i]
+
+               #pdb.set_trace()
+               cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
+               param_idx += 1
+               [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+
+         #
+         # end of one iteration through all examples
+         #
+         iteration_nr += 1
+
+      #
+      # end of optimization 
+      #  
+      print 'Training completed'
+
+      pa = para()
+      pa.h = h
+      pa.d = d
+      pa.a = a
+      pa.mmatrix = mmatrix
+      pa.qualityPlifs = qualityPlifs
+
+      cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
+      #cPickle.dump(pa,open('elegans.param','w+'))
+      self.logfh.close()
+
+
+
+   def predict(self):
+      beg = Conf.prediction_begin
+      end = Conf.prediction_end
+
+      Sequences   = self.Sequences[beg:end]
+      Exons       = self.Exons[beg:end]
+      Ests        = self.Ests[beg:end]
+      Qualities   = self.Qualities[beg:end]
+      Acceptors   = self.Acceptors[beg:end]
+      Donors      = self.Donors[beg:end]
+
+      remove_duplicate_scores = Conf.remove_duplicate_scores 
+      print_matrix            = Conf.print_matrix 
+      anzpath                 = Conf.anzpath 
+
+      param_filename='/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/param_28.pickle'
+      param = load_param(param_filename)
+
+      # Set the parameters such as limits penalties for the Plifs
+      [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+
+      #############################################################################################
+      # Prediction
+      #############################################################################################
+      self.plog('Starting prediction...\n')
+
+      donSP       = Conf.numDonSuppPoints
+      accSP       = Conf.numAccSuppPoints
+      lengthSP    = Conf.numLengthSuppPoints
+      mmatrixSP   = Conf.sizeMatchmatrix[0]\
+      *Conf.sizeMatchmatrix[1]
+      numq        = Conf.numQualSuppPoints
+      totalQualSP = Conf.totalQualSuppPoints
+
+      currentPhi = zeros((Conf.numFeatures,1))
+      totalQualityPenalties = zeros((totalQualSP,1))
+
+      total_up_off = []
+      total_down_off = []
+
+      for exampleIdx in range(self.numExamples):
+         dna = Sequences[exampleIdx] 
+         est = Ests[exampleIdx] 
+         currentSplitPos = SplitPos[exampleIdx]
+
+         if Conf.mode == 'normal':
+            quality = [40]*len(est)
+
+         if Conf.mode == 'using_quality_scores':
+            quality = Qualities[exampleIdx]
+
+         exons = Exons[exampleIdx] 
+         # NoiseMatrix = Noises[exampleIdx] 
+         don_supp = Donors[exampleIdx] 
+         acc_supp = Acceptors[exampleIdx] 
+
+         if exons[-1,1] > len(dna):
+            continue
+
+         # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)    
+         trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
+         
+         # Calculate the weights
+         trueWeightDon, trueWeightAcc, trueWeightIntron = computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
+         trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
+
+         currentPhi[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
+         currentPhi[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
+         currentPhi[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
+         currentPhi[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
+
+         if Conf.mode == 'using_quality_scores':
+            totalQualityPenalties = param[-totalQualSP:]
+            currentPhi[donSP+accSP+lengthSP+mmatrixSP:]                    = totalQualityPenalties[:]
+
+         # Calculate w'phi(x,y) the total score of the alignment
+         trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
+
+         # The allWeights vector is supposed to store the weight parameter
+         # of the true alignment as well as the weight parameters of the
+         # 1 other alignments
+         allWeights = zeros((Conf.numFeatures,1+1))
+         allWeights[:,0] = trueWeight[:,0]
+
+         AlignmentScores = [0.0]*(1+1)
+         AlignmentScores[0] = trueAlignmentScore
+
+         ################## Calculate wrong alignment(s) ######################
+
+         # Compute donor, acceptor with penalty_lookup_new
+         # returns two double lists
+         donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
+
+         #myalign wants the acceptor site on the g of the ag
+         acceptor = acceptor[1:]
+         acceptor.append(-inf)
+
+         dna = str(dna)
+         est = str(est)
+         dna_len = len(dna)
+         est_len = len(est)
+
+         ps = h.convert2SWIG()
+
+         prb = QPalmaDP.createDoubleArrayFromList(quality)
+         chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
+
+         matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
+         mm_len = Conf.sizeMatchmatrix[0]*Conf.sizeMatchmatrix[1]
+
+         d_len = len(donor)
+         donor = QPalmaDP.createDoubleArrayFromList(donor)
+         a_len = len(acceptor)
+         acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
+
+         # Create the alignment object representing the interface to the C/C++ code.
+         currentAlignment = QPalmaDP.Alignment(Conf.numQualPlifs,Conf.numQualSuppPoints, self.use_quality_scores)
+
+         c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
+
+         # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
+         currentAlignment.myalign(1, dna, dna_len,\
+          est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
+          acceptor, a_len, c_qualityPlifs, remove_duplicate_scores,
+          print_matrix)
+
+         c_SpliceAlign  = QPalmaDP.createIntArrayFromList([0]*(dna_len*1))
+         c_EstAlign     = QPalmaDP.createIntArrayFromList([0]*(est_len*1))
+         c_WeightMatch  = QPalmaDP.createIntArrayFromList([0]*(mm_len*1))
+         c_DPScores     = QPalmaDP.createDoubleArrayFromList([.0]*1)
+
+         c_qualityPlifsFeatures = QPalmaDP.createDoubleArrayFromList([.0]*(Conf.totalQualSuppPoints))
+
+         currentAlignment.getAlignmentResults(c_SpliceAlign, c_EstAlign,\
+         c_WeightMatch, c_DPScores, c_qualityPlifsFeatures)
+
+         newSpliceAlign = zeros((dna_len,1))
+         newEstAlign    = zeros((est_len,1))
+         newWeightMatch = zeros((mm_len,1))
+         newQualityPlifsFeatures = zeros((Conf.totalQualSuppPoints,1))
+
+         #print 'newSpliceAlign'
+         for i in range(dna_len):
+            newSpliceAlign[i] = c_SpliceAlign[i]
+         #   print '%f' % (spliceAlign[i])
+
+         #print 'newEstAlign'
+         for i in range(est_len):
+            newEstAlign[i] = c_EstAlign[i]
+         #   print '%f' % (spliceAlign[i])
+
+         #print 'weightMatch'
+         for i in range(mm_len):
+            newWeightMatch[i] = c_WeightMatch[i]
+         #   print '%f' % (weightMatch[i])
+
+         newDPScores = c_DPScores[0]
+
+         for i in range(Conf.totalQualSuppPoints):
+            newQualityPlifsFeatures[i] = c_qualityPlifsFeatures[i]
+
+         #print "Calling destructors"
+         del c_SpliceAlign
+         del c_EstAlign
+         del c_WeightMatch
+         del c_DPScores
+         del c_qualityPlifsFeatures
+         del currentAlignment
+
+         newSpliceAlign = newSpliceAlign.reshape(1,dna_len)
+         newWeightMatch = newWeightMatch.reshape(1,mm_len)
+         # Calculate weights of the respective alignments Note that we are
+         # calculating n-best alignments without hamming loss, so we
+         # have to keep track which of the n-best alignments correspond to
+         # the true one in order not to incorporate a true alignment in the
+         # constraints. To keep track of the true and false alignments we
+         # define an array true_map with a boolean indicating the
+         # equivalence to the true alignment for each decoded alignment.
+         true_map = [0]*2
+         true_map[0] = 1
+         pathNr = 0
+
+         weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a, h, newSpliceAlign.flatten().tolist()[0], don_supp, acc_supp)
+
+         decodedQualityFeatures = zeros((Conf.totalQualSuppPoints,1))
+         for qidx in range(Conf.totalQualSuppPoints):
+            decodedQualityFeatures[qidx] = newQualityPlifsFeatures[qidx]
+
+         # Gewichte in restliche Zeilen der Matrix speichern
+         wp = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch.T, decodedQualityFeatures])
+         allWeights[:,pathNr+1] = wp
+
+         hpen = mat(h.penalties).reshape(len(h.penalties),1)
+         dpen = mat(d.penalties).reshape(len(d.penalties),1)
+         apen = mat(a.penalties).reshape(len(a.penalties),1)
+         features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties])
+         AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
+
+         # if the pathNr-best alignment is very close to the true alignment consider it as true
+         if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
+            true_map[pathNr+1] = 1
+
+         up_off,down_off = evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+         #evaluateExample(dna,est,exons,newSpliceAlign,newEstAlign,currentSplitPos)
+         #print up_off,down_off 
+
+         if up_off > -1:
+            total_up_off.append(up_off)
+            total_down_off.append(down_off)
+
+      total_up = 0
+      total_down = 0 
+      for idx in range(len(total_up_off)):
+         total_up    += total_up_off[idx]
+         total_down  += total_down_off[idx]
+         
+      total_up /= len(total_up_off)
+      total_down /= len(total_down_off)
+
+      print 'Mean up_off is %f' % total_up
+      print 'Mean down_off is %f' % total_down
+      print 'Correct up_off len is %d' % len([elem for elem in total_up_off if elem in [0,1]])
+      print 'Correct down_off len is %d' % len([elem for elem in total_down_off if elem in [0,1]])
+
+      print 'Prediction completed'
+      self.logfh.close()
+
+def fetch_genome_info(ginfo_filename):
+   if not os.path.exists(ginfo_filename):
+      cmd = ['']*4
+      cmd[0] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/utils'
+      cmd[1] = 'addpath /fml/ag-raetsch/home/fabio/svn/tools/genomes'
+      cmd[2] = 'genome_info = init_genome(\'%s\')' % gen_file
+      cmd[3] = 'save genome_info.mat genome_info'  
+      full_cmd = "matlab -nojvm -nodisplay -r \"%s; %s; %s; %s; exit\"" % (cmd[0],cmd[1],cmd[2],cmd[3])
+
+      obj = subprocess.Popen(full_cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+      out,err = obj.communicate()
+      assert err == '', 'An error occured!\n%s'%err
+
+      ginfo = scipy.io.loadmat('genome_info.mat')
+      cPickle.dump(self.genome_info,open(ginfo_filename,'w+'))
+      return ginfo['genome_info']
+
+   else:
+      return cPickle.load(open(ginfo_filename))
+
+def plifs2param(h,d,a,mmatrix,qualityPlifs):
+   donSP       = Conf.numDonSuppPoints
+   accSP       = Conf.numAccSuppPoints
+   lengthSP    = Conf.numLengthSuppPoints
+   mmatrixSP   = Conf.sizeMatchmatrix[0]\
+   *Conf.sizeMatchmatrix[1]
+
+
+   param = zeros((Conf.numFeatures,1))
+   param[0:donSP]                                               = mat(d.penalties[:]).reshape(donSP,1)
+   param[donSP:donSP+accSP]                                     = mat(a.penalties[:]).reshape(accSP,1)
+   param[donSP+accSP:donSP+accSP+lengthSP]                      = mat(h.penalties[:]).reshape(lengthSP,1)
+   param[donSP+accSP+lengthSP:donSP+accSP+lengthSP+mmatrixSP]   = mmatrix[:]
+
+   for idx in range(len(qualityPlifs)):
+      currentPlif       = qualityPlifs[idx]
+      begin             = lengthSP+donSP+accSP+mmatrixSP+(idx*Conf.numQualSuppPoints)
+      end               = lengthSP+donSP+accSP+mmatrixSP+((idx+1)*Conf.numQualSuppPoints)
+      param[begin:end]  = mat(currentPlif.penalties).reshape(Conf.numQualSuppPoints,1)
+
+   return param
+
+def load_param(filename):
+   param = None 
+   #try:
+   #   para = cPickle.load(open(filename))
+   #except:
+   #   print 'Error: Could not open parameter file!'
+   #   sys.exit(1)
+   #
+   #param = plifs2param(para.h,para.d,para.a,para.mmatrix,para.qualityPlifs)
+
+   param = cPickle.load(open(filename))
+   return param
+
+def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign,spos):
+   newExons = []
+   oldElem = -1
+   SpliceAlign = SpliceAlign.flatten().tolist()[0]
+   SpliceAlign.append(-1)
+   for pos,elem in enumerate(SpliceAlign):
+      if pos == 0:
+         oldElem = -1
+      else:
+         oldElem = SpliceAlign[pos-1]
+
+      if oldElem != 0 and elem == 0: # start of exon
+         newExons.append(pos-1)
+
+      if oldElem == 0 and elem != 0: # end of exon
+         newExons.append(pos)
+
+   up_off   = -1
+   down_off = -1
+
+   if len(newExons) != 4:
+      acc = 0.0
+   else:
+      e1_begin,e1_end = newExons[0],newExons[1]
+      e2_begin,e2_end = newExons[2],newExons[3]
+
+      up_off   = int(math.fabs(e1_end - exons[0,1]))
+      down_off = int(math.fabs(e2_begin - exons[1,0]))
+
+   return up_off,down_off
+
+
+if __name__ == '__main__':
+
+   qpalma = QPalma()
+   if len(sys.argv) != 2:
+      print 'You have to choose between training or prediction mode:'
+      print 'python qpalma. py (train|predict)' 
+
+   mode = sys.argv[1]
+   assert mode in ['train','predict']
+
+   if mode == 'train':
+      qpalma.train()
+      
+   if mode == 'predict':
+      qpalma.predict()
+