+ renamed dyn_prog directory
authorFabio <fabio@congo.fml.local>
Tue, 7 Oct 2008 09:40:25 +0000 (11:40 +0200)
committerFabio <fabio@congo.fml.local>
Tue, 7 Oct 2008 09:40:25 +0000 (11:40 +0200)
+ extended setup script and MANIFEST to work with C++ code
+ extended README

45 files changed:
DynProg/Makefile [new file with mode: 0644]
DynProg/Mathmatics.cpp [new file with mode: 0644]
DynProg/Mathmatics.h [new file with mode: 0644]
DynProg/QPalmaDP.i [new file with mode: 0644]
DynProg/common.h [new file with mode: 0644]
DynProg/config.h [new file with mode: 0644]
DynProg/debug_tools.cpp [new file with mode: 0644]
DynProg/debug_tools.h [new file with mode: 0644]
DynProg/fill_matrix.cpp [new file with mode: 0644]
DynProg/fill_matrix.h [new file with mode: 0644]
DynProg/io.cpp [new file with mode: 0644]
DynProg/io.h [new file with mode: 0644]
DynProg/penalty_info.cpp [new file with mode: 0644]
DynProg/penalty_info.h [new file with mode: 0644]
DynProg/print_align.cpp [new file with mode: 0644]
DynProg/qpalma_dp.cpp [new file with mode: 0644]
DynProg/qpalma_dp.h [new file with mode: 0644]
DynProg/result_align.cpp [new file with mode: 0644]
DynProg/test_fill_matrix.cpp [new file with mode: 0644]
MANIFEST.in [new file with mode: 0644]
README [new file with mode: 0644]
doc/qpalma.tex
dyn_prog/Makefile [deleted file]
dyn_prog/Mathmatics.cpp [deleted file]
dyn_prog/Mathmatics.h [deleted file]
dyn_prog/QPalmaDP.i [deleted file]
dyn_prog/common.h [deleted file]
dyn_prog/config.h [deleted file]
dyn_prog/debug_tools.cpp [deleted file]
dyn_prog/debug_tools.h [deleted file]
dyn_prog/fill_matrix.cpp [deleted file]
dyn_prog/fill_matrix.h [deleted file]
dyn_prog/io.cpp [deleted file]
dyn_prog/io.h [deleted file]
dyn_prog/penalty_info.cpp [deleted file]
dyn_prog/penalty_info.h [deleted file]
dyn_prog/print_align.cpp [deleted file]
dyn_prog/qpalma_dp.cpp [deleted file]
dyn_prog/qpalma_dp.h [deleted file]
dyn_prog/result_align.cpp [deleted file]
dyn_prog/test_fill_matrix.cpp [deleted file]
qpalma/Plif.py
qpalma/sequence_utils.py
scripts/__init__.py [new file with mode: 0644]
setup.py

diff --git a/DynProg/Makefile b/DynProg/Makefile
new file mode 100644 (file)
index 0000000..9232ee2
--- /dev/null
@@ -0,0 +1,38 @@
+SRCS= Mathmatics.cpp\
+               fill_matrix.cpp\
+               qpalma_dp.cpp\
+               result_align.cpp\
+               debug_tools.cpp\
+               penalty_info.cpp\
+               print_align.cpp\
+               io.cpp
+
+HDRS= Mathmatics.h\
+               common.h\
+               config.h\
+               debug_tools.h\
+               fill_matrix.h\
+               io.h\
+               penalty_info.h\
+               qpalma_dp.h
+
+OBJS = $(SRCS:%.cpp=%.o)
+
+#CXXFLAGS=-O3 -fPIC -g -ggdb 
+CXXFLAGS=-Wall -std=c++98 -ggdb -O3 -fPIC -I/usr/include/python2.5
+LDFLAGS=-lprofiler -L/fml/ag-raetsch/home/fabio/own_libs/libunwind/lib -lunwind-x86_64 -lunwind
+
+
+IF=QPalmaDP
+
+all: $(OBJS) $(HDRS)
+       @ swig -c++ -python ${IF}.i
+       @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
+       @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
+       @ python -c "import ${IF}"
+
+test: $(OBJS) $(HDRS) 
+       g++ $(CXXFLAGS) $(LDFLAGS) -o test_fm  debug_tools.o Mathmatics.o io.o penalty_info.o fill_matrix.o test_fill_matrix.cpp 
+
+clean:
+       @ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
diff --git a/DynProg/Mathmatics.cpp b/DynProg/Mathmatics.cpp
new file mode 100644 (file)
index 0000000..61298da
--- /dev/null
@@ -0,0 +1,349 @@
+// Math.cpp: implementation of the CMath class.
+//
+//////////////////////////////////////////////////////////////////////
+
+
+#include "Mathmatics.h"
+#include "io.h"
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+#include <assert.h>
+
+//////////////////////////////////////////////////////////////////////
+// Construction/Destruction
+//////////////////////////////////////////////////////////////////////
+
+#ifdef USE_LOGCACHE
+//gene/math specific constants
+#ifdef USE_HMMDEBUG
+#define MAX_LOG_TABLE_SIZE 10*1024*1024
+#define LOG_TABLE_PRECISION 1e-6
+#else
+#define MAX_LOG_TABLE_SIZE 123*1024*1024
+#define LOG_TABLE_PRECISION 1e-15
+#endif
+
+INT CMath::LOGACCURACY         = 0; // 100000 steps per integer
+#endif
+
+INT CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
+
+#ifdef USE_PATHDEBUG
+const REAL CMath::INFTY            =  1e11;            // infinity
+#else
+const REAL CMath::INFTY            =  -log(0.0);       // infinity
+#endif
+const REAL CMath::ALMOST_NEG_INFTY =  -1000;   
+
+CHAR CMath::rand_state[256];
+
+CMath::CMath()
+{
+       struct timeval tv;
+       gettimeofday(&tv, NULL);
+       UINT seed=(UINT) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
+       initstate(seed, CMath::rand_state, sizeof(CMath::rand_state));
+       CIO::message(M_INFO, "seeding random number generator with %u\n", seed);
+
+#ifdef USE_LOGCACHE
+    LOGRANGE=CMath::determine_logrange();
+    LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
+    CIO::message(M_INFO, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE,LOGACCURACY,sizeof(REAL),LOGRANGE*LOGACCURACY*sizeof(REAL)/(1024.0*1024.0)) ;
+   
+    CMath::logtable=new REAL[LOGRANGE*LOGACCURACY];
+    init_log_table();
+    CIO::message(M_INFO, "Done.\n") ;
+#else
+       INT i=0;
+       while ((REAL)log(1+((REAL)exp(-REAL(i)))))
+               i++;
+    CIO::message(M_INFO, "determined range for x in log(1+exp(-x)) is:%d\n", i);
+       LOGRANGE=i;
+#endif 
+}
+
+CMath::~CMath()
+{
+#ifdef USE_LOGCACHE
+       delete[] logtable;
+#endif
+}
+
+#ifdef USE_LOGCACHE
+INT CMath::determine_logrange()
+{
+    INT i;
+    REAL acc=0;
+    for (i=0; i<50; i++)
+    {
+       acc=((REAL)log(1+((REAL)exp(-REAL(i)))));
+       if (acc<=(REAL)LOG_TABLE_PRECISION)
+           break;
+    }
+
+    CIO::message(M_INFO, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
+    return i;
+}
+
+INT CMath::determine_logaccuracy(INT range)
+{
+    range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(REAL));
+    CIO::message(M_INFO, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
+    return range;
+}
+
+//init log table of form log(1+exp(x))
+void CMath::init_log_table()
+{
+  for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
+    logtable[i]=log(1+exp(REAL(-i)/REAL(LOGACCURACY)));
+}
+#endif
+
+void CMath::sort(INT *a, INT cols, INT sort_col)
+{
+  INT changed=1;
+  if (a[0]==-1) return ;
+  while (changed)
+  {
+      changed=0; INT i=0 ;
+      while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
+         {
+                 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
+                 {
+                         for (INT j=0; j<cols; j++)
+                                 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
+                         changed=1 ;
+                 } ;
+                 i++ ;
+         } ;
+  } ;
+} 
+
+void CMath::sort(REAL *a, INT* idx, INT N) 
+{
+
+       INT changed=1;
+       while (changed)
+       {
+               changed=0;
+               for (INT i=0; i<N-1; i++)
+               {
+                       if (a[i]>a[i+1])
+                       {
+                               swap(a[i],a[i+1]) ;
+                               swap(idx[i],idx[i+1]) ;
+                               changed=1 ;
+                       } ;
+               } ;
+       } ;
+        
+} 
+
+
+
+//plot x- axis false positives (fp) 1-Specificity
+//plot y- axis true positives (tp) Sensitivity
+INT CMath::calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile)
+{
+       INT left=0;
+       INT right=size-1;
+       INT i;
+
+       for (i=0; i<size; i++)
+       {
+               if (!(label[i]== -1 || label[i]==1))
+                       return -1;
+       }
+
+       //sort data such that -1 labels first +1 behind
+       while (left<right)
+       {
+               while ((label[left] < 0) && (left<right))
+                       left++;
+               while ((label[right] > 0) && (left<right))      //warning: label must be either -1 or +1
+                       right--;
+
+               swap(output[left],output[right]);
+               swap(label[left], label[right]);
+       }
+
+       // neg/pos sizes
+       negsize=left;
+       possize=size-left;
+       REAL* negout=output;
+       REAL* posout=&output[left];
+
+       // sort the pos and neg outputs separately
+       qsort(negout, negsize);
+       qsort(posout, possize);
+
+       // set minimum+maximum values for decision-treshhold
+       REAL minimum=min(negout[0], posout[0]);
+       REAL maximum=minimum;
+       if (negsize>0)
+               maximum=max(maximum,negout[negsize-1]);
+       if (possize>0)
+               maximum=max(maximum,posout[possize-1]);
+
+       REAL treshhold=minimum;
+       REAL old_treshhold=treshhold;
+
+       //clear array.
+       for (i=0; i<size; i++)
+       {
+               fp[i]=1.0;
+               tp[i]=1.0;
+       }
+
+       //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
+       //everything right of {pos,neg}idx is considered to beLONG to +1
+       INT posidx=0;
+       INT negidx=0;
+       INT iteration=1;
+       INT returnidx=-1;
+
+       REAL minerr=10;
+
+       while (iteration < size && treshhold<=maximum)
+       {
+               old_treshhold=treshhold;
+
+               while (treshhold==old_treshhold && treshhold<=maximum)
+               {
+                       if (posidx<possize && negidx<negsize)
+                       {
+                               if (posout[posidx]<negout[negidx])
+                               {
+                                       if (posout[posidx]==treshhold)
+                                               posidx++;
+                                       else
+                                               treshhold=posout[posidx];
+                               }
+                               else
+                               {
+                                       if (negout[negidx]==treshhold)
+                                               negidx++;
+                                       else
+                                               treshhold=negout[negidx];
+                               }
+                       }
+                       else
+                       {
+                               if (posidx>=possize && negidx<negsize-1)
+                               {
+                                       negidx++;
+                                       treshhold=negout[negidx];
+                               }
+                               else if (negidx>=negsize && posidx<possize-1)
+                               {
+                                       posidx++;
+                                       treshhold=posout[posidx];
+                               }
+                               else if (negidx<negsize && treshhold!=negout[negidx])
+                                       treshhold=negout[negidx];
+                               else if (posidx<possize && treshhold!=posout[posidx])
+                                       treshhold=posout[posidx];
+                               else
+                               {
+                                       treshhold=2*(maximum+100); // force termination
+                                       posidx=possize;
+                                       negidx=negsize;
+                                       break;
+                               }
+                       }
+               }
+
+               //calc tp,fp
+               tp[iteration]=(possize-posidx)/(REAL (possize));
+               fp[iteration]=(negsize-negidx)/(REAL (negsize));
+
+               //choose poINT with minimal error
+               if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
+               {
+                       minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
+                       tresh=(old_treshhold+treshhold)/2;
+                       returnidx=iteration;
+               }
+
+               iteration++;
+       }
+
+       // set new size
+       size=iteration;
+
+       if (rocfile)
+       {
+               const CHAR id[]="ROC";
+               fwrite(id, sizeof(char), sizeof(id), rocfile);
+               fwrite(fp, sizeof(REAL), size, rocfile);
+               fwrite(tp, sizeof(REAL), size, rocfile);
+       }
+
+       return returnidx;
+}
+
+UINT CMath::crc32(BYTE *data, INT len)
+{
+    UINT        result;
+    INT                 i,j;
+    BYTE       octet;
+
+    result = 0-1;
+
+    for (i=0; i<len; i++)
+    {
+       octet = *(data++);
+       for (j=0; j<8; j++)
+       {
+           if ((octet >> 7) ^ (result >> 31))
+           {
+               result = (result << 1) ^ 0x04c11db7;
+           }
+           else
+           {
+               result = (result << 1);
+           }
+           octet <<= 1;
+       }
+    }
+
+    return ~result; 
+}
+
+double CMath::mutual_info(REAL* p1, REAL* p2, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               for (INT j=0; j<len; j++)
+                       e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
+
+       return e;
+}
+
+double CMath::relative_entropy(REAL* p, REAL* q, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               e+=exp(p[i])*(p[i]-q[i]);
+
+       return e;
+}
+
+double CMath::entropy(REAL* p, INT len)
+{
+       double e=0;
+
+       for (INT i=0; i<len; i++)
+               e-=exp(p[i])*p[i];
+
+       return e;
+}
diff --git a/DynProg/Mathmatics.h b/DynProg/Mathmatics.h
new file mode 100644 (file)
index 0000000..2e24d48
--- /dev/null
@@ -0,0 +1,546 @@
+#ifndef __MATHMATICS_H_
+#define __MATHMATICS_H_
+
+#include "common.h"
+#include "io.h"
+
+#include <math.h>
+#include <stdio.h>
+
+//define finite for win32
+#ifdef _WIN32
+#include <float.h>
+#define finite _finite
+#define isnan _isnan
+#endif
+
+#ifndef NAN
+#include <stdlib.h>
+#define NAN (strtod("NAN",NULL))
+#endif
+
+#ifdef SUNOS
+extern "C" int finite(double);
+#endif
+
+/** Mathematical Functions.
+ * Class which collects generic mathematical functions
+ */
+class CMath  
+{
+public:
+       /**@name Constructor/Destructor.
+        */
+       //@{
+       ///Constructor - initializes log-table
+       CMath();
+
+       ///Destructor - frees logtable
+       virtual ~CMath();
+       //@}
+
+       /**@name min/max/abs functions.
+       */
+       //@{
+
+       ///return the minimum of two integers
+       //
+       template <class T>
+       static inline T min(T a, T b)
+       {
+               return ((a)<=(b))?(a):(b);
+       }
+
+       ///return the maximum of two integers
+       template <class T>
+       static inline T max(T a, T b) 
+       {
+               return ((a)>=(b))?(a):(b);
+       }
+
+       ///return the maximum of two integers
+       template <class T>
+       static inline T abs(T a)
+       {
+               return ((a)>=(0))?(a):(-a);
+       }
+       //@}
+
+       /**@name misc functions */
+       //@{
+
+       /// crc32
+       static UINT crc32(BYTE *data, INT len);
+
+       static inline REAL round(REAL d)
+       {
+               return floor(d+0.5);
+       }
+
+       /// signum of type T variable a 
+       template <class T>
+       static inline REAL sign(REAL a)
+       {
+               if (a==0)
+                       return 0;
+               else return (a<0) ? (-1) : (+1);
+       }
+
+       /// swap floats a and b
+       template <class T>
+       static inline void swap(T & a,T &b)
+       {
+               T c=a ;
+               a=b; b=c ;
+       }
+
+       /// x^2
+       template <class T>
+       static inline T sq(T x)
+       {
+               return x*x;
+       }
+
+       static inline LONG factorial(INT n)
+       {
+               LONG res=1 ;
+               for (int i=2; i<=n; i++)
+                       res*=i ;
+               return res ;
+       }
+
+       static inline LONG nchoosek(INT n, INT k)
+       {
+               long res=1 ;
+
+               for (INT i=n-k+1; i<=n; i++)
+                       res*=i ;
+
+               return res/factorial(k) ;
+       }
+
+       /** performs a bubblesort on a given matrix a.
+        * it is sorted from in ascending order from top to bottom
+        * and left to right */
+       static void sort(INT *a, INT cols, INT sort_col=0) ;
+       static void sort(REAL *a, INT*idx, INT N) ;
+       
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending (for type T) */
+       template <class T>
+       static void qsort(T* output, INT size) ;
+
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending order 
+        * (for type T1) and returns the index (type T2)
+        * matlab alike [sorted,index]=sort(output) 
+        */
+       template <class T1,class T2>
+       static void qsort(T1* output, T2* index, INT size);
+
+       /** performs a quicksort on an array output of length size
+        * it is sorted from in ascending order
+        * (for type T1) and returns the index (type T2)
+        * matlab alike [sorted,index]=sort(output) 
+        */
+       template <class T1,class T2>
+       static void qsort_backward(T1* output, T2* index, INT size);
+
+     /* finds the smallest element in output and puts that element as the 
+               first element  */
+       template <class T>
+       static void min(REAL* output, T* index, INT size);
+
+     /* finds the n smallest elements in output and puts these elements as the 
+               first n elements  */
+       template <class T>
+       static void nmin(REAL* output, T* index, INT size, INT n);
+
+       /* performs a inplace unique of a vector of type T using quicksort 
+        * returns the new number of elements */
+       template <class T>
+       static INT unique(T* output, INT size) 
+       {
+               qsort(output, size) ;
+               INT i,j=0 ;
+               for (i=0; i<size; i++)
+                       if (i==0 || output[i]!=output[i-1])
+                               output[j++]=output[i] ;
+               return j ;
+       }
+
+       /* finds an element in a sorted array via binary search
+     * returns -1 if not found */
+       static inline INT fast_find(WORD* output, INT size, WORD elem)
+       {
+               INT start=0, end=size-1, middle=size/2 ;
+
+               if (output[start]>elem || output[end]<elem)
+                       return -1 ;
+
+               while (1)
+               {
+                       if (output[middle]>elem)
+                       {
+                               end = middle ;
+                               middle=start+(end-start)/2 ;
+                       } ;
+                       if (output[middle]<elem)
+                       {
+                               start = middle ;
+                               middle=start+(end-start)/2 ;
+                       }
+                       if (output[middle]==elem)
+                               return middle ;
+                       if (end-start<=1)
+                       {
+                               if (output[start]==elem)
+                                       return start ;
+                               if (output[end]==elem)
+                                       return end ;
+                               return -1 ;
+                       }
+               }
+       }
+
+       /* finds an element in a sorted array via binary search
+        * that is smaller-equal elem und the next element is larger-equal  */
+       static inline INT fast_find_range(REAL* output, INT size, REAL elem)
+       {
+               INT start=0, end=size-2, middle=(size-2)/2 ;
+
+               if (output[start]>elem)
+                       return -1 ;
+               if (output[end]<=elem)
+                       return size-1 ;
+
+               while (1)
+               {
+                       if (output[middle]>elem)
+                       {
+                               end = middle ;
+                               middle=start+(end-start)/2 ;
+                       } ;
+                       if (output[middle]<elem)
+                       {
+                               start = middle ;
+                               middle=start+(end-start)/2 ;
+                       }
+                       if (output[middle]<=elem && output[middle+1]>=elem)
+                               return middle ;
+                       if (end-start<=1)
+                       {
+                               if (output[start]<=elem && output[start+1]>=elem)
+                                       return start ;
+                               return end ;
+                       }
+               }
+       }
+
+       /** calculates ROC into (fp,tp)
+        * from output and label of length size 
+        * returns index with smallest error=fp+fn
+        */
+       static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
+       //@}
+
+       /// returns the mutual information of p which is given in logspace
+       /// where p,q are given in logspace
+       static double mutual_info(REAL* p1, REAL* p2, INT len);
+
+       /// returns the relative entropy H(P||Q), 
+       /// where p,q are given in logspace
+       static double relative_entropy(REAL* p, REAL* q, INT len);
+
+       /// returns entropy of p which is given in logspace
+       static double entropy(REAL* p, INT len);
+
+       /**@name summing functions */
+       //@{ 
+       /** sum logarithmic probabilities.
+        * Probability measures are summed up but are now given in logspace where
+        * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
+        * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
+#ifdef USE_LOGCACHE
+       static inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+               if (finite(p))
+               {
+                       if (finite(q))
+                       {
+
+                               register REAL diff=p-q;
+
+                               if (diff>0)             //p>q
+                               {
+                                       if (diff > LOGRANGE)
+                                               return p;
+                                       else
+                                               return  p + logtable[(int)(diff*LOGACCURACY)];
+                               }
+                               else                    //p<=q
+                               {
+                                       if (-diff > LOGRANGE)
+                                               return  q;
+                                       else
+                                               return  q + logtable[(int)(-diff*LOGACCURACY)];
+                               }
+                       }
+                       CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
+                       return NAN;
+               }
+               else 
+                       return q;
+       }
+
+       ///init log table of form log(1+exp(x))
+       static void init_log_table();
+       
+       /// determine INT x for that log(1+exp(-x)) == 0
+       static INT determine_logrange();
+
+       /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
+       static INT determine_logaccuracy(INT range);
+#else
+       /*
+       inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+           double result=comp_logarithmic_sum(p,q);
+
+           printf("diff:%f <-> %f\n",p-q, result);
+           return result;
+       }*/
+
+       static inline REAL logarithmic_sum(REAL p, REAL q)
+       {
+               if (finite(p))
+               {
+                       if (finite(q))
+                       {
+
+                               register REAL diff=p-q;
+                               if (diff>0)             //p>q
+                               {
+                                       if (diff > LOGRANGE)
+                                               return p;
+                                       else
+                                               return  p + log(1+exp(-diff));
+                               }
+                               else                    //p<=q
+                               {
+                                       if (-diff > LOGRANGE)
+                                               return  q;
+                                       else
+                                               return  q + log(1+exp(diff));
+                               }
+                       }
+                       return p;
+               }
+               else 
+                       return q;
+       }
+#endif
+#ifdef LOG_SUM_ARRAY
+       /** sum up a whole array of values in logspace.
+        * This function addresses the numeric instabiliy caused by simply summing up N elements by adding 
+        * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
+        * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
+        */
+       static inline REAL logarithmic_sum_array(REAL *p, INT len)
+         {
+           if (len<=2)
+             {
+               if (len==2)
+                 return logarithmic_sum(p[0],p[1]) ;
+               if (len==1)
+                 return p[0];
+               return -INFTY ;
+             }
+           else
+             {
+               register REAL *pp=p ;
+               if (len%2==1) pp++ ;
+               for (register INT j=0; j < len>>1; j++)
+                 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
+             }
+           return logarithmic_sum_array(p,len%2+len>>1) ;
+         } 
+#endif
+       //@}
+public:
+       /**@name constants*/
+       //@{
+       /// infinity
+       static const REAL INFTY;
+       
+       /// almost neg (log) infinity
+       static const REAL ALMOST_NEG_INFTY;
+
+       /// range for logtable: log(1+exp(x))  -LOGRANGE <= x <= 0
+       static INT LOGRANGE;
+       
+#ifdef USE_LOGCACHE    
+       /// number of steps per integer
+       static INT LOGACCURACY;
+       //@}
+protected:
+       ///table with log-values
+       static REAL* logtable;  
+#endif
+       static CHAR rand_state[256];
+};
+
+
+//implementations of template functions
+template <class T>
+void CMath::qsort(T* output, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] > output [1])
+                       swap(output[0],output[1]);
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+
+               INT left=0;
+               INT right=size-1;
+
+               while (left<=right)
+               {
+                       while (output[left] < split)
+                               left++;
+                       while (output[right] > split)
+                               right--;
+
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               left++;
+                               right--;
+                       }
+               }
+
+               if (right+1> 1)
+                       qsort(output,right+1);
+
+               if (size-left> 1)
+                       qsort(&output[left],size-left);
+       }
+}
+
+template <class T1,class T2>
+void CMath::qsort(T1* output, T2* index, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] > output [1]){
+                       swap(output[0],output[1]);
+                       swap(index[0],index[1]);
+               }
+               
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+               
+               INT left=0;
+               INT right=size-1;
+               
+               while (left<=right)
+               {
+                       while (output[left] < split)
+                               left++;
+                       while (output[right] > split)
+                               right--;
+                       
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               swap(index[left],index[right]);
+                               left++;
+                               right--;
+                       }
+               }
+               
+               if (right+1> 1)
+                       qsort(output,index,right+1);
+               
+               if (size-left> 1)
+                       qsort(&output[left],&index[left], size-left);
+       }
+}
+
+template <class T1,class T2>
+void CMath::qsort_backward(T1* output, T2* index, INT size)
+{
+       if (size==2)
+       {
+               if (output[0] < output [1]){
+                       swap(output[0],output[1]);
+                       swap(index[0],index[1]);
+               }
+               
+       }
+       else
+       {
+               REAL split=output[(size*rand())/(RAND_MAX+1)];
+               //REAL split=output[size/2];
+               
+               INT left=0;
+               INT right=size-1;
+               
+               while (left<=right)
+               {
+                       while (output[left] > split)
+                               left++;
+                       while (output[right] < split)
+                               right--;
+                       
+                       if (left<=right)
+                       {
+                               swap(output[left],output[right]);
+                               swap(index[left],index[right]);
+                               left++;
+                               right--;
+                       }
+               }
+               
+               if (right+1> 1)
+                       qsort(output,index,right+1);
+               
+               if (size-left> 1)
+                       qsort(&output[left],&index[left], size-left);
+       }
+}
+
+template <class T> 
+void CMath::nmin(REAL* output, T* index, INT size, INT n)
+{
+       if (6*n*size<13*size*log((double)size))
+               for (INT i=0; i<n; i++)
+                       min(&output[i], &index[i], size-i) ;
+       else
+               qsort(output, index, size) ;
+}
+
+template <class T>
+void CMath::min(REAL* output, T* index, INT size)
+{
+       if (size<=0)
+               return ;
+       REAL min_elem = output[0] ;
+       INT min_index = 0 ;
+       for (INT i=1; i<size; i++)
+               if (output[i]<min_elem)
+               {
+                       min_index=i ;
+                       min_elem=output[i] ;
+               }
+       swap(output[0], output[min_index]) ;
+       swap(index[0], index[min_index]) ;
+}
+#endif
diff --git a/DynProg/QPalmaDP.i b/DynProg/QPalmaDP.i
new file mode 100644 (file)
index 0000000..5eedd23
--- /dev/null
@@ -0,0 +1,62 @@
+%module QPalmaDP
+%{
+#define SWIG_FILE_WITH_INIT
+#include "common.h"
+#include "Mathmatics.h"
+#include "penalty_info.h"
+#include "qpalma_dp.h"
+%}
+
+%include "carrays.i"
+
+%include "common.h"
+%include "Mathmatics.h"
+%include "penalty_info.h"
+%include "qpalma_dp.h"
+
+/*
+%array_class(int, intArray) ;
+%array_class(double, doubleArray) ;
+%array_functions(double, doubleFArray) ;
+%array_functions(struct penalty_struct, penaltyArray) ;
+%array_class(Pre_score, Pre_scoreArray) ;
+
+%pythoncode
+%{
+
+def createPenaltyArrayFromList(list):
+   array = new_penaltyArray(len(list))
+   for i in range(len(list)):
+      penaltyArray_setitem(array, i, list[i])
+   return array
+
+def createDoubleArrayFromList(list):
+   #array = new_doubleArray(len(list))
+   array = doubleArray(len(list))
+   for i in range(len(list)):
+   #   doubleArray_setitem(array, i, list[i])
+      array[i] = list[i]
+   return array
+
+def createIntArrayFromList(list):
+   #array = new_intArray(len(list))
+   array = intArray(len(list))
+   for i in range(len(list)):
+      #intArray_setitem(array, i, list[i])
+      array[i] = list[i]
+   return array
+
+def createListFromIntArray(array, array_len):
+   list = [0]*array_len
+   for i in range(array_len):
+      list[i] = intArray_getitem(array,i)
+   return list
+
+def createListFromDoubleArray(array, array_len):
+   list = [0]*array_len
+   for i in range(array_len):
+      list[i] = doubleArray_getitem(array,i)
+   return list
+
+%} 
+*/
diff --git a/DynProg/common.h b/DynProg/common.h
new file mode 100644 (file)
index 0000000..61776e9
--- /dev/null
@@ -0,0 +1,160 @@
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <stdlib.h> 
+#include <stdio.h> 
+#include "config.h"
+
+#ifdef SUNOS
+#define bool int
+#define false 0
+#define true 1
+#endif
+
+#ifndef LINUX
+#define RANDOM_MAX 2147483647
+#else
+#define RANDOM_MAX RAND_MAX
+#endif
+
+/**@name Standard Types 
+ * Definition of Platform independent Types
+*/
+//@{
+
+/// Type CHAR
+typedef char CHAR;
+typedef CHAR* P_CHAR;
+
+/// Type BYTE 
+typedef unsigned char BYTE;
+typedef BYTE* P_BYTE;
+
+/// Type SHORT is 2 bytes in size
+typedef short int SHORT;
+typedef SHORT* P_SHORT;
+
+/// Type WORD is 2 bytes in size
+typedef unsigned short int WORD;
+typedef WORD* P_WORD;
+
+/// Type INT is 4 bytes in size
+typedef int INT;
+typedef INT* P_INT;
+
+/// Type INT is 4 bytes in size
+typedef unsigned int UINT;
+typedef UINT* P_UINT;
+
+/// Type LONG is 8 bytes in size
+typedef long LONG;
+typedef LONG* P_LONG;
+
+/// Type SHORTREAL is 4 bytes in size
+typedef float SHORTREAL;
+typedef SHORTREAL* P_SHORTREAL;
+
+/// Type REAL is 8 bytes in size
+typedef double REAL;
+typedef REAL* P_REAL;
+
+/// Type LONGREAL is 16 bytes in size
+//typedef long double LONGREAL;
+//typedef LONGREAL* P_LONGREAL;
+
+#ifdef USE_SHORTREAL_KERNELCACHE
+       typedef SHORTREAL KERNELCACHE_ELEM;
+#else
+       typedef REAL KERNELCACHE_ELEM;
+#endif
+
+typedef KERNELCACHE_ELEM P_KERNELCACHE_ELEM;
+
+typedef LONG KERNELCACHE_IDX;
+
+/// The io libs output [DEBUG] etc in front of every CIO::message
+/// 'higher' messages filter output depending on the loglevel, i.e. CRITICAL messages
+/// will print all M_CRITICAL TO M_EMERGENCY messages to
+enum EMessageType
+{
+       M_DEBUG,
+       M_INFO,
+       M_NOTICE,
+       M_WARN,
+       M_ERROR,
+       M_CRITICAL,
+       M_ALERT,
+       M_EMERGENCY,
+       M_MESSAGEONLY,
+       M_PROGRESS
+};
+
+enum EKernelType
+{
+       K_UNKNOWN = 0,
+       K_OPTIMIZABLE = 4096,
+       K_LINEAR = 10 | K_OPTIMIZABLE,
+       K_POLY  = 20,
+       K_GAUSSIAN = 30,
+       K_HISTOGRAM = 40,
+       K_SALZBERG = 41,
+       K_LOCALITYIMPROVED = 50,
+       K_SIMPLELOCALITYIMPROVED = 60,
+       K_FIXEDDEGREE = 70,
+       K_WEIGHTEDDEGREE =    80 | K_OPTIMIZABLE,
+       K_WEIGHTEDDEGREEPOS = 81 | K_OPTIMIZABLE,
+       K_WEIGHTEDDEGREEPOLYA =    82,
+       K_WD = 83,
+       K_COMMWORD = 90 | K_OPTIMIZABLE ,
+       K_POLYMATCH = 100,
+       K_ALIGNMENT = 110,
+       K_COMMWORDSTRING = 120 | K_OPTIMIZABLE,
+       K_SPARSENORMSQUARED = 130,
+       K_COMBINED = 140 | K_OPTIMIZABLE
+};
+
+enum EFeatureType
+{
+       F_UNKNOWN = 0,
+       F_REAL = 10,
+       F_SHORT = 20,
+       F_CHAR = 30,
+       F_INT = 40,
+       F_BYTE = 50,
+       F_WORD = 60
+};
+
+enum EFeatureClass
+{
+       C_UNKNOWN = 0,
+       C_SIMPLE = 10,
+       C_SPARSE = 20,
+       C_STRING = 30,
+       C_COMBINED = 40
+};
+
+/// Alphabet of charfeatures/observations
+enum E_ALPHABET
+{
+       /// DNA - letters A,C,G,T,*,N,n
+       DNA=0,
+
+       /// PROTEIN - letters a-z
+       PROTEIN=1,
+
+       /// ALPHANUM - [0-9a-z]
+       ALPHANUM=2,
+
+       /// CUBE - [1-6]
+       CUBE=3,
+
+       /// NONE - type has no alphabet
+       NONE=4
+};
+
+//@}
+
+#define TMP_DIR "/tmp/"
+//#define TMP_DIR "/short/x46/tmp/"
+
+#endif
diff --git a/DynProg/config.h b/DynProg/config.h
new file mode 100644 (file)
index 0000000..88b448b
--- /dev/null
@@ -0,0 +1,19 @@
+
+#define HAVE_MATLAB 1
+
+#define HAVE_LARGEFILE 1
+
+
+
+
+
+
+#define USE_BIGSTATES 1
+
+
+#define USE_HMMCACHE 1
+
+
+
+
+
diff --git a/DynProg/debug_tools.cpp b/DynProg/debug_tools.cpp
new file mode 100644 (file)
index 0000000..18c7d26
--- /dev/null
@@ -0,0 +1,11 @@
+#include <cstdio>
+#include <cstdlib>
+#include "debug_tools.h"
+using namespace std;
+
+void fassert(bool exp,int line, char* file) {
+   if (! exp) {
+      printf("invalid fassert at line %d in file %s!\n",line,file);
+      exit(EXIT_FAILURE);
+   }
+}
diff --git a/DynProg/debug_tools.h b/DynProg/debug_tools.h
new file mode 100644 (file)
index 0000000..4865bee
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef __DEBUG_TOOLS__
+#define __DEBUG_TOOLS__
+
+#include <cstdio>
+
+void fassert(bool exp,int line, char* file);
+
+#endif // __DEBUG_TOOLS__
+
+#define FA(expr) (fassert(expr,__LINE__,__FILE__))
+
diff --git a/DynProg/fill_matrix.cpp b/DynProg/fill_matrix.cpp
new file mode 100644 (file)
index 0000000..2f87968
--- /dev/null
@@ -0,0 +1,501 @@
+/*********************************************************************************************/
+// Version of fill_matrix with fills several alignment matrices (best, second_best ...)
+// nr_paths det. number of these matrices
+//
+// est_len: Number of columns in alignment matrix: length(EST) +1 (for the gap) (variable: i)
+// dna_len: Number of lines in alignment matrix: length(DNA) +1 (for the gap) (variable: j)
+//
+// !!! IMPORTANT !!! The pictures and equations in the paper/poster etc. describe the algorithm
+// for an alignment matrix where DNA and EST changed places !!! IMPORTANT !!!
+/*********************************************************************************************/
+
+/*
+  matchmatrix: columnwise
+    - A C G T N (dna)
+  - x x x x x x
+  A x x x x x x        
+  C x x x x x x
+  G x x x x x x
+  T x x x x x x
+  N x x x x x x
+(est)
+*/
+
+/*
+  alignment matrix: columnwise
+   |-EST . . .
+  -+------------
+  -|00 ...
+  D|0.
+  N|. .
+  A|.  .
+  .|.
+  .|
+  .|
+*/
+
+
+/* TODO
+   - max_len = 10000 for elegans!
+   - do not use double
+*/
+
+#include "fill_matrix.h"
+#include "debug_tools.h"
+
+#define D_USE_QUALITY_SCORES 1
+
+int check_char(char base)
+{
+  switch(base)
+  {
+  case 'A':
+  case 'a': return 1;
+  case 'C':    
+  case 'c': return 2;
+  case 'G':
+  case 'g': return 3;
+  case 'T':
+  case 't': return 4;
+  case 'N':
+  case 'n': return 5; 
+  //default : return 0;
+  default : return -1;
+  }
+  
+}
+
+ /* system of Palma. Otherwise scoring_functions should point to an array of
+ * plifs scoring the respective pairs of characters together with the EST
+ * quality score.
+ */
+
+inline double getScore(struct penalty_struct* qualityScores, int mlen, int dnaChar, int estChar, double baseScore) {
+   double score = 0.0;
+   FA(estChar > 0 && estChar < 6);
+   FA(dnaChar > -1 && dnaChar < 6);
+
+   //printf("Starting scoring / scheme is USE_QUALITY_SCORES\n");
+   //printf("estChar,dnaChar are: %d,%d,%d",estChar,dnaChar);
+   
+   int currentPos = (estChar-1)*6+dnaChar;
+   struct penalty_struct currentPen = qualityScores[currentPos];
+
+   //if( estChar == 1 and dnaChar == 1)
+   //   printf("A-A : %f %f\n",currentPen.penalties[0],currentPen.penalties[1]);
+
+       INT idx = 0 ;
+       for (INT i=0; i<currentPen.len; i++)
+               if (currentPen.limits[i] <= baseScore)
+                       idx++ ;
+       
+       if (idx==0)
+               score=currentPen.penalties[0] ;
+       else if (idx==currentPen.len)
+               score=currentPen.penalties[currentPen.len-1] ;
+       else
+       {
+               score = (currentPen.penalties[idx]*(baseScore-currentPen.limits[idx-1]) + currentPen.penalties[idx-1]*
+                          (currentPen.limits[idx]-baseScore)) / (currentPen.limits[idx]-currentPen.limits[idx-1]) ;  
+   }
+
+   //score = lookup_penalty(&currentPen,idx, NULL, 0);
+   //if(baseScore <= currentPen.limits[0]) {
+   //   score = currentPen.penalties[0];
+   //} else {
+   //   int sp_idx;
+   //   for(sp_idx=0;sp_idx<currentPen.len;sp_idx++) {
+   //      if(currentPen.limits[sp_idx] <= baseScore )
+   //         score = currentPen.penalties[sp_idx];
+   //   //printf("Current score is %f\n",score);
+   //   }
+   //}
+
+   return score;
+}
+
+void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, double* prb, penalty_struct* functions , double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
+{      
+  //printf("Entering fill_matrix...\n");
+
+  /*********************************************************************************************/
+  // variables
+  /*********************************************************************************************/
+  const int mlen = 6; // length of matchmatrix
+  int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
+
+  double baseScore;
+  double *est_scores = prb ;
+
+  double prevValue ;
+  double tempValue;
+  int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
+  //printf("max_len: %d\n", max_len) ;
+
+  int num_elem ; // counter variable
+
+  const int max_num_elem = (4+nr_paths+max_len) * nr_paths ; // upper bound on num_elem
+  double *scores=new double[max_num_elem] ; //are sorted later
+  struct ArrayElem *array=new ArrayElem[max_num_elem] ;
+  //printf("%d\n", max_num_elem*sizeof(ArrayElem)) ;
+  
+  int max_ss_element[nr_paths*nr_paths]; //position on dna with max ss score for introns of length greater than max_len
+  int splice_pos ; //splice position get from donor_sites
+  int start_splice_pos ; //where to start looking in donor_sites
+
+  double max_scores[nr_paths] ;
+
+  /*********************************************************************************************/
+  // Initialisation of all alignment matrices, columnwise
+  /*********************************************************************************************/
+  for (int z=0; z<nr_paths; z++) /* matrice z */
+  {
+    max_scores[z] = log(0.0) ;
+    max_score_positions[2*z]    = 0; // pos i
+    max_score_positions[2*z +1] = 0; // pos j
+
+    for (int i=0; i<est_len; i++) /* i.column */
+    {
+      for (int j=0; j<dna_len; j++) /* j.line */
+      {
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->value = log(0.0); // -inf
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_i = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_j = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_matrix_no = 0;
+       ((Pre_score*)matrices[z] +(i*dna_len) +j)->issplice = 0;
+      }
+    }
+  }
+
+  /*********************************************************************************************/      
+  // Element (0,0) for first/best matrice (z=0)
+  /*********************************************************************************************/       
+  ((Pre_score*)matrices[0])->value = 0;
+  ((Pre_score*)matrices[0])->prev_i = 0;
+  ((Pre_score*)matrices[0])->prev_j = 0;
+  ((Pre_score*)matrices[0])->prev_matrix_no = 0;
+  ((Pre_score*)matrices[0])->issplice = 0;
+
+  /*********************************************************************************************/      
+  // Line Zero for first/best matrice (z=0)
+  /*********************************************************************************************/
+  for (int i=1; i<est_len; i++) /* 0.line, all columns (DNA='-') */
+  {    
+    ((Pre_score*)matrices[0] +(i*dna_len))->value = 0 ;                
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_i = i-1;
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_j = 0;    
+    ((Pre_score*)matrices[0] +(i*dna_len))->prev_matrix_no = 0;
+    ((Pre_score*)matrices[0] +(i*dna_len))->issplice = 0;
+  }
+
+
+  /*********************************************************************************************/      
+  // Column Zero for first/best matrice (z=0)
+  /*********************************************************************************************/      
+  for (int j=1; j<dna_len; j++) // 0.column, all lines
+  {      
+    ((Pre_score*)matrices[0] +j)->value =  0 ;
+    ((Pre_score*)matrices[0] +j)->prev_i = 0 ;
+    ((Pre_score*)matrices[0] +j)->prev_j = j-1 ;
+    ((Pre_score*)matrices[0] +j)->prev_matrix_no = 0 ;
+    ((Pre_score*)matrices[0] +j)->issplice = 0;
+  }
+
+   
+  /*********************************************************************************************/      
+  // !!!! Column Zero and line Zero for all other matrices except the best one (z>0): -INF !!!! 
+  /*********************************************************************************************/      
+
+
+   //printf("after initialization\n");
+  
+  /*********************************************************************************************/
+  // all other entries for all matrices, last column and row treated seperately
+  /*********************************************************************************************/
+  
+  /* columnwise, 0.column and 0.line already filled */
+  for (int i=1; i<est_len; i++) /* i.column */
+  {
+    
+    start_splice_pos = 0 ; //where to start looking in donor_sites
+    for (int zz=0; zz<nr_paths*nr_paths; zz++) {max_ss_element[zz]=0 ;} // initializing
+  
+    for (int j=1; j<dna_len; j++) //j.line
+    {
+      dnaChar = check_char(dna[j-1]) ; // -1 because of of the gaps in the 0.line/column of the al-matrix
+      estChar = check_char(est[i-1]) ;
+      baseScore = est_scores[i-1];
+
+      //printf("at iteration %d %d\n",i,j);
+
+      //if introns of length greater than max_len possible
+      if (j > max_len)
+      {
+       //storing highest ss_score (the position) and NOT -INF
+       int new_ss_pos = j-max_len ;
+       double donor_score = donor[new_ss_pos-1] ;
+       if (isfinite(donor_score)) //splice site!
+        {
+         if (max_ss_element[nr_paths-1] != 0)
+          { 
+           for (int zz=0; zz<nr_paths; zz++) //for each alignment matrix
+            {
+             int pos_with_minscore = nr_paths ; //assuming, the new score is the worst score
+             double new_intron_score = donor_score+((Pre_score*)matrices[zz]+(i*dna_len)+new_ss_pos-1)->value ; //could be -INF
+             if (!(isfinite(new_intron_score))) {printf("new_intron_score is -INF\n") ;}
+             for (int pos=0; pos<nr_paths; pos++) //find worst score of all nr_paths+1 scores
+              {
+               int act_splice_position = max_ss_element[zz*nr_paths+pos] ;
+               assert(isfinite(donor[act_splice_position-1])) ;
+               double act_intron_score = donor[act_splice_position-1]+((Pre_score*)matrices[zz]+(i*dna_len)+act_splice_position-1)->value ;
+               if (new_intron_score >= act_intron_score) 
+                {
+                 new_intron_score = act_intron_score ;
+                 pos_with_minscore = pos ;
+               } //else nothing happens, new intron score still the worst one
+             }
+             if (pos_with_minscore != nr_paths)
+              {
+               max_ss_element[zz*nr_paths+pos_with_minscore] = new_ss_pos ; //replacing
+             }
+           }
+         }
+         else
+          {
+           for (int pos=0; pos<nr_paths; pos++)
+            {
+             if (max_ss_element[pos]==0)  //no previous splice site at this pos
+              {
+               for (int zz=0; zz<nr_paths; zz++) {max_ss_element[zz*nr_paths+pos]=j-max_len ;}
+               break ;
+             }
+           } 
+         }
+       }
+       //start_pos in donor_sites so that only introns shorter then max_length are seen
+       if ((start_splice_pos < nr_donor_sites) && donor_sites[start_splice_pos] == j-max_len)
+        {
+         start_splice_pos++ ;
+       }
+      }
+      
+
+      /* Computing all possible scores */
+      num_elem = 0 ; /* number of different scores/possibilities so far */
+      for (int z=0; z<nr_paths; z++) /* matrice z */
+      {
+       Pre_score* actMatrix = (Pre_score*)matrices[z]; /* hm, why?  */
+
+       /***************************************/
+       // 0. Zero (local alignment version)
+        /***************************************/
+       assert(num_elem<max_num_elem) ;
+       array[num_elem].prev_i = 0; 
+       array[num_elem].prev_j = 0; 
+       array[num_elem].prev_matrix_no = z;
+       array[num_elem].isSplice = false; /* no splice site */
+       scores[num_elem] = 0 ;
+       num_elem++ ;
+
+
+        /***************************************/
+       // 1. gap on EST (j-1,i) -> (j,i) 
+        /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
+       if (isfinite(prevValue))
+       {
+      //if( currentMode == USE_QUALITY_SCORES ) {
+#if D_USE_QUALITY_SCORES 
+         tempValue = prevValue + matchmatrix[dnaChar];
+#else
+      //} else {
+             tempValue = prevValue + matchmatrix[mlen*dnaChar];   /* score(gap,DNA) */
+      //}
+#endif
+
+     //printf("tempValue: %f\n",tempValue);
+
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i; 
+           array[num_elem].prev_j = j-1; /* predecessor */
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false; /* no splice site */
+           scores[num_elem] = -tempValue ;
+           num_elem++ ;
+         }
+       }
+      
+
+       /***************************************/
+       // 2. match/mismatch (j-1,i-1) -> (j,i)
+       /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
+       if (isfinite(prevValue))
+       {
+      //if( currentMode == USE_QUALITY_SCORES ) {
+#if D_USE_QUALITY_SCORES 
+        tempValue = prevValue + getScore(qualityScores,mlen,dnaChar,estChar,baseScore);
+#else
+      //} else {
+        tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);
+#endif
+     //}
+
+     //printf("tempValue: %f\n",tempValue);
+
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i-1; /* predecessor */
+           array[num_elem].prev_j = j-1; /* predecessor */
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false; /* no splice site */
+           scores[num_elem] = -tempValue ;
+           num_elem++ ;
+         }
+       }
+       
+
+       /***************************************/
+       // 3. gap on DNA (j,i-1) -> (j,i)
+       /***************************************/
+       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
+       if (isfinite(prevValue))
+       {
+      //if( currentMode == USE_QUALITY_SCORES ) {
+#if D_USE_QUALITY_SCORES 
+        tempValue = prevValue + getScore(qualityScores,mlen,0,estChar,baseScore);
+#else
+//      } else {
+            tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
+//      }
+#endif
+
+     //printf("tempValue: %f\n",tempValue);
+
+         if (isfinite(tempValue))
+         {
+           assert(num_elem<max_num_elem) ;
+           array[num_elem].prev_i = i-1; /* predecessor */
+           array[num_elem].prev_j = j; 
+           array[num_elem].prev_matrix_no = z;
+           array[num_elem].isSplice = false;  /* no splice site */
+           scores[num_elem]=-tempValue ;
+           num_elem++ ;
+         }
+       }
+
+       /***************************************/
+       // 4. all possible splice sites/introns
+       /***************************************/
+       // acceptor[j-1]: g of ag (-1 because cpp starts counting at 0)
+       // donor[splice_pos-1]: g of gt/gc (-1 because cpp starts counting at 0)
+       // donor_sites: the g's of the gt/gc
+       
+       if (isfinite(acceptor[j-1]))
+       {
+         
+         //the nr_paths many introns of length greater than max_len
+         for (int pos=0; pos<nr_paths; pos++)
+          {
+           if (max_ss_element[z*nr_paths+pos]>0)
+            {
+             assert((j-max_ss_element[z*nr_paths+pos]+1) > max_len) ;
+             splice_pos = max_ss_element[z*nr_paths+pos] ; //the g of the gt 
+             prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+             if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+              {
+               int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+               assert(dist>max_len) ;
+               tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+
+       //printf("tempValue: %f\n",tempValue);
+
+               if (isfinite(tempValue))
+                {
+                 assert(num_elem<max_num_elem) ;
+                 array[num_elem].prev_i = i; 
+                 array[num_elem].prev_j = splice_pos-1; // predecessor
+                 array[num_elem].prev_matrix_no = z;
+                 array[num_elem].isSplice = true; // splice site
+                 scores[num_elem]=-tempValue ;
+                 num_elem++ ;
+               }
+               else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+                {
+                 printf("Something wrong with donor_sites(1000er) (intron_length: %d)\n", dist) ;
+                 sleep(5000) ;
+               }
+             }
+           }
+         }
+
+          //all introns of length <= max_len
+          for (int k=start_splice_pos; k < nr_donor_sites; k++) 
+         //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
+         {   
+           splice_pos = donor_sites[k] ;
+           if (splice_pos > j-4) { break ;}
+           prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
+           
+           if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
+           {
+             int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
+             assert(dist<=max_len) ;
+             tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
+             if (isfinite(tempValue))
+             {
+               assert(num_elem<max_num_elem) ;
+               array[num_elem].prev_i = i; 
+               array[num_elem].prev_j = splice_pos-1; /* predecessor */
+               array[num_elem].prev_matrix_no = z;
+               array[num_elem].isSplice = true; /* splice site */
+               scores[num_elem]=-tempValue ;
+               num_elem++ ;
+             }
+             else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
+             {
+               printf("Something wrong with donor_sites (intron_length: %d)\n", dist) ;
+               sleep(5000) ;
+             }
+           }
+         }
+       }
+
+      } //end of z
+      
+      CMath::nmin<struct ArrayElem>(scores, array, num_elem, nr_paths);
+      
+      /* the nr_paths many highest scores are written in the nr_paths many matrices*/
+      for (int z=0; z<nr_paths; z++) {
+       if (z>=num_elem) {
+         //nothing happens, entry in matrices[z] at pos. (i,j) still -inf
+       }
+       else {
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->value = -scores[z] ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_i = array[z].prev_i ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_j = array[z].prev_j ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->issplice = array[z].isSplice ;
+         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_matrix_no = array[z].prev_matrix_no;
+
+         if (-scores[z]>=max_scores[z]) { //storing the postion of the highest alignment score yet
+           max_scores[z] = -scores[z] ;
+           max_score_positions[2*z]    = i; // pos i
+           max_score_positions[2*z +1] = j; // pos j
+         }
+       }
+      } //end of z
+
+    } //end of j
+  } // end of i
+
+  //free memory
+  delete[] array;
+  delete[] scores;
+
+  //printf("Leaving fill_matrix...\n");
+  return;
+}
diff --git a/DynProg/fill_matrix.h b/DynProg/fill_matrix.h
new file mode 100644 (file)
index 0000000..9432c4e
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef _FILLMATRIX_H__
+#define _FILLMATRIX_H__
+
+#include "Mathmatics.h"
+#include "penalty_info.h"
+#include <algorithm> //Eigentlich in Header
+#include <assert.h>
+#include <ctype.h>
+#include <iostream>
+#include <math.h>
+#include <stdlib.h> 
+#include <string.h>
+
+
+struct ArrayElem { //24B
+  double score;
+  int prev_j;
+  int prev_i;
+  int prev_matrix_no;   
+  bool isSplice;
+} ;
+
+struct pre_score { //24B
+  double value; //8
+  int prev_i; //4
+  int prev_j; //4
+  int prev_matrix_no;//4
+  bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+  double value ; 
+  bool issplice;
+  int previ,prevj ;
+  char est_char;
+  char dna_char;
+};
+
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+// funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
+
+extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar);
+
+
+double scoreMatch(int dnaChar, int estChar, double baseScore);
+
+
+#endif // _FILLMATRIX_H__
diff --git a/DynProg/io.cpp b/DynProg/io.cpp
new file mode 100644 (file)
index 0000000..5ae3e1c
--- /dev/null
@@ -0,0 +1,96 @@
+#include "io.h"
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+FILE* CIO::target=stdout;
+
+CIO::CIO()
+{
+}
+
+void CIO::message(EMessageType prio, const CHAR *fmt, ... )
+{
+       check_target();
+       print_message_prio(prio, target);
+    va_list list;
+    va_start(list,fmt);
+    vfprintf(target,fmt,list);
+    va_end(list);
+    fflush(target);
+}
+
+void CIO::buffered_message(EMessageType prio, const CHAR *fmt, ... )
+{
+       check_target();
+       print_message_prio(prio, target);
+    va_list list;
+    va_start(list,fmt);
+    vfprintf(target,fmt,list);
+    va_end(list);
+}
+
+CHAR* CIO::skip_spaces(CHAR* str)
+{
+       INT i=0;
+
+       if (str)
+       {
+               for (i=0; isspace(str[i]); i++);
+
+               return &str[i];
+       }
+       else 
+               return str;
+}
+
+void CIO::set_target(FILE* t)
+{
+       target=t;
+}
+
+void CIO::check_target()
+{
+       if (!target)
+               target=stdout;
+}
+
+void CIO::print_message_prio(EMessageType prio, FILE* target)
+{
+       switch (prio)
+       {
+               case M_DEBUG:
+                       fprintf(target, "[DEBUG] ");
+                       break;
+               case M_PROGRESS:
+                       fprintf(target, "[PROGRESS] ");
+                       break;
+               case M_INFO:
+                       //fprintf(target, "[INFO] ");
+                       break;
+               case M_NOTICE:
+                       fprintf(target, "[NOTICE] ");
+                       break;
+               case M_WARN:
+                       fprintf(target, "[WARN] ");
+                       break;
+               case M_ERROR:
+                       fprintf(target, "[ERROR] ");
+                       break;
+               case M_CRITICAL:
+                       fprintf(target, "[CRITICAL] ");
+                       break;
+               case M_ALERT:
+                       fprintf(target, "[ALERT] ");
+                       break;
+               case M_EMERGENCY:
+                       fprintf(target, "[EMERGENCY] ");
+                       break;
+               case M_MESSAGEONLY:
+                       break;
+               default:
+                       break;
+       }
+}
diff --git a/DynProg/io.h b/DynProg/io.h
new file mode 100644 (file)
index 0000000..c127f92
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef __CIO_H__
+#define __CIO_H__
+
+#include "common.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+
+class CIO
+{
+public:
+       CIO();
+
+       static void set_target(FILE* target);
+       static void message(EMessageType prio, const CHAR *fmt, ... );
+
+       inline static void not_implemented() 
+       {
+               message(M_ERROR, "Sorry, not yet implemented\n");
+       }
+
+       static void buffered_message(EMessageType prio, const CHAR *fmt, ... );
+
+       static CHAR* skip_spaces(CHAR* str);
+
+protected:
+       static void check_target();
+       static void print_message_prio(EMessageType prio, FILE* target);
+
+protected:
+       static FILE* target;
+};
+#endif
diff --git a/DynProg/penalty_info.cpp b/DynProg/penalty_info.cpp
new file mode 100644 (file)
index 0000000..d5d739f
--- /dev/null
@@ -0,0 +1,368 @@
+#include <assert.h>
+#include "config.h"
+//#include "features/CharFeatures.h"
+//#include "features/StringFeatures.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "io.h"
+
+
+#include "fill_matrix.h"
+#include "penalty_info.h"
+
+void init_penalty_struct(struct penalty_struct &PEN)
+{
+       PEN.limits=NULL ;
+       PEN.penalties=NULL ;
+       PEN.id=-1 ;
+       PEN.next_pen=NULL ;
+       PEN.transform = T_LINEAR ;
+       PEN.name = NULL ;
+       PEN.max_len=0 ;
+       PEN.min_len=0 ;
+       PEN.cache=NULL ;
+       PEN.use_svm=0 ;
+}
+
+void init_penalty_struct_cache(struct penalty_struct &PEN)
+{
+       if (PEN.cache || PEN.use_svm)
+               return ;
+               
+       REAL* cache=new REAL[PEN.max_len+1] ;
+       if (cache)
+       {
+               for (INT i=0; i<=PEN.max_len; i++)
+                       cache[i] = lookup_penalty(&PEN, i, 0, false) ;
+               PEN.cache = cache ;
+       }
+}
+
+void delete_penalty_struct_palma(struct penalty_struct &PEN)
+{
+       if (PEN.id!=-1)
+       {
+               delete[] PEN.limits ;
+               delete[] PEN.penalties ;
+               delete[] PEN.name ;
+               delete[] PEN.cache ;
+       }
+}
+
+void delete_penalty_struct_array(struct penalty_struct *PEN, INT len)
+{
+       for (int i=0; i<len; i++)
+               delete_penalty_struct_palma(PEN[i]) ;
+       delete[] PEN ;
+}
+
+void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp) {
+   newp->len       = old->len;
+   newp->limits    = old->limits;
+   newp->penalties = old->penalties;
+   newp->max_len   = old->max_len;
+   newp->min_len   = old->min_len;
+   newp->cache     = old->cache;
+   newp->transform = old->transform;
+   newp->id        = old->id;
+   newp->next_pen  = old->next_pen;
+   newp->name      = old->name;
+   newp->use_svm   = old->use_svm;
+}
+
+
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, int &P)
+//{
+//     P = mxGetN(mx_penalty_info) ;
+//     
+//     struct penalty_struct * PEN = new struct penalty_struct[P] ;
+//     for (INT i=0; i<P; i++)
+//              init_penalty_struct(PEN[i]) ;
+//     
+//     for (INT i=0; i<P; i++)
+//     {
+//       const mxArray* mx_elem = mxGetCell(mx_penalty_info, i) ;
+//       if (mx_elem==NULL || !mxIsStruct(mx_elem))
+//         {
+//           CIO::message(M_ERROR, "empty cell element\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         } ;
+//       const mxArray* mx_id_field = mxGetField(mx_elem, 0, "id") ;
+//       if (mx_id_field==NULL || !mxIsNumeric(mx_id_field) || 
+//           mxGetN(mx_id_field)!=1 || mxGetM(mx_id_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing id field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_limits_field = mxGetField(mx_elem, 0, "limits") ;
+//       if (mx_limits_field==NULL || !mxIsNumeric(mx_limits_field) ||
+//           mxGetM(mx_limits_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing limits field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT len = mxGetN(mx_limits_field) ;
+//       
+//       const mxArray* mx_penalties_field = mxGetField(mx_elem, 0, "penalties") ;
+//       if (mx_penalties_field==NULL || !mxIsNumeric(mx_penalties_field) ||
+//           mxGetM(mx_penalties_field)!=1 || mxGetN(mx_penalties_field)!=len)
+//         {
+//           CIO::message(M_ERROR, "missing penalties field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_transform_field = mxGetField(mx_elem, 0, "transform") ;
+//       if (mx_transform_field==NULL || !mxIsChar(mx_transform_field))
+//         {
+//           CIO::message(M_ERROR, "missing transform field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_name_field = mxGetField(mx_elem, 0, "name") ;
+//       if (mx_name_field==NULL || !mxIsChar(mx_name_field))
+//         {
+//           CIO::message(M_ERROR, "missing name field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_max_len_field = mxGetField(mx_elem, 0, "max_len") ;
+//       if (mx_max_len_field==NULL || !mxIsNumeric(mx_max_len_field) ||
+//           mxGetM(mx_max_len_field)!=1 || mxGetN(mx_max_len_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing max_len field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_min_len_field = mxGetField(mx_elem, 0, "min_len") ;
+//       if (mx_min_len_field==NULL || !mxIsNumeric(mx_min_len_field) ||
+//           mxGetM(mx_min_len_field)!=1 || mxGetN(mx_min_len_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing min_len field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       const mxArray* mx_use_svm_field = mxGetField(mx_elem, 0, "use_svm") ;
+//       if (mx_use_svm_field==NULL || !mxIsNumeric(mx_use_svm_field) ||
+//           mxGetM(mx_use_svm_field)!=1 || mxGetN(mx_use_svm_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing use_svm field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT use_svm = (INT) mxGetScalar(mx_use_svm_field) ;
+//             //fprintf(stderr, "use_svm_field=%i\n", use_svm) ;
+//       
+//       const mxArray* mx_next_id_field = mxGetField(mx_elem, 0, "next_id") ;
+//       if (mx_next_id_field==NULL || !mxIsNumeric(mx_next_id_field) ||
+//           mxGetM(mx_next_id_field)!=1 || mxGetN(mx_next_id_field)!=1)
+//         {
+//           CIO::message(M_ERROR, "missing next_id field\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT next_id = (INT) mxGetScalar(mx_next_id_field)-1 ;
+//       
+//       INT id = (INT) mxGetScalar(mx_id_field)-1 ;
+//       if (i<0 || i>P-1)
+//         {
+//           CIO::message(M_ERROR, "id out of range\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       INT max_len = (INT) mxGetScalar(mx_max_len_field) ;
+//       if (max_len<0 || max_len>1024*1024*100)
+//         {
+//           CIO::message(M_ERROR, "max_len out of range\n") ;
+//                     delete_penalty_struct_array(PEN,P) ;
+//                     return NULL ;
+//         }
+//       PEN[id].max_len = max_len ;
+//
+//       INT min_len = (INT) mxGetScalar(mx_min_len_field) ;
+//       if (min_len<0 || min_len>1024*1024*100)
+//         {
+//           CIO::message(M_ERROR, "min_len out of range\n") ;
+//                     delete_penalty_struct_array(PEN,P) ;
+//                     return NULL ;
+//         }
+//       PEN[id].min_len = min_len ;
+//       
+//       if (PEN[id].id!=-1)
+//         {
+//           CIO::message(M_ERROR, "penalty id already used\n") ;
+//           delete_penalty_struct_array(PEN,P) ;
+//           return NULL ;
+//         }
+//       PEN[id].id=id ;
+//       if (next_id>=0)
+//         PEN[id].next_pen=&PEN[next_id] ;
+//       //fprintf(stderr,"id=%i, next_id=%i\n", id, next_id) ;
+//       
+//       assert(next_id!=id) ;
+//       PEN[id].use_svm=use_svm ;
+//       PEN[id].limits = new REAL[len] ;
+//       PEN[id].penalties = new REAL[len] ;
+//       double * limits = mxGetPr(mx_limits_field) ;
+//       double * penalties = mxGetPr(mx_penalties_field) ;
+//       
+//       for (INT i=0; i<len; i++)
+//         {
+//           PEN[id].limits[i]=limits[i] ;
+//           PEN[id].penalties[i]=penalties[i] ;
+//         }
+//       PEN[id].len = len ;
+//       
+//       char *transform_str = mxArrayToString(mx_transform_field) ;                           
+//       char *name_str = mxArrayToString(mx_name_field) ;                             
+//       
+//       if (strcmp(transform_str, "log")==0)
+//         PEN[id].transform = T_LOG ;
+//       else if (strcmp(transform_str, "log(+1)")==0)
+//         PEN[id].transform = T_LOG_PLUS1 ;   
+//       else if (strcmp(transform_str, "log(+3)")==0)
+//         PEN[id].transform = T_LOG_PLUS3 ;   
+//       else if (strcmp(transform_str, "(+3)")==0)
+//         PEN[id].transform = T_LINEAR_PLUS3 ;        
+//       else if (strcmp(transform_str, "")==0)
+//         PEN[id].transform = T_LINEAR ;      
+//       else
+//         {
+//           delete_penalty_struct_array(PEN,P) ;
+//           mxFree(transform_str) ;
+//           return NULL ;
+//         }
+//       PEN[id].name = new char[strlen(name_str)+1] ;
+//       strcpy(PEN[id].name, name_str) ;
+//       
+//       init_penalty_struct_cache(PEN[id]) ;
+//         
+//       mxFree(transform_str) ;
+//       mxFree(name_str) ;
+//     }
+//     return PEN ;
+//}
+
+REAL lookup_penalty_svm(const struct penalty_struct *PEN, INT p_value, REAL *d_values)
+{      
+       if (PEN==NULL)
+               return 0 ;
+       assert(PEN->use_svm>0) ;
+       REAL d_value=d_values[PEN->use_svm-1] ;
+       //fprintf(stderr,"transform=%i, d_value=%1.2f\n", (INT)PEN->transform, d_value) ;
+       
+       switch (PEN->transform)
+       {
+       case T_LINEAR:
+               break ;
+       case T_LOG:
+               d_value = log(d_value) ;
+               break ;
+       case T_LOG_PLUS1:
+               d_value = log(d_value+1) ;
+               break ;
+       case T_LOG_PLUS3:
+               d_value = log(d_value+3) ;
+               break ;
+       case T_LINEAR_PLUS3:
+               d_value = d_value+3 ;
+               break ;
+       default:
+               CIO::message(M_ERROR, "unknown transform\n") ;
+               break ;
+       }
+       
+       INT idx = 0 ;
+       REAL ret ;
+       for (INT i=0; i<PEN->len; i++)
+               if (PEN->limits[i]<=d_value)
+                       idx++ ;
+       
+       if (idx==0)
+               ret=PEN->penalties[0] ;
+       else if (idx==PEN->len)
+               ret=PEN->penalties[PEN->len-1] ;
+       else
+       {
+               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
+       }
+       
+       //fprintf(stderr,"ret=%1.2f\n", ret) ;
+
+       if (PEN->next_pen)
+               ret+=lookup_penalty(PEN->next_pen, p_value, d_values);
+       
+       //fprintf(stderr,"ret=%1.2f\n", ret) ;
+       
+       return ret ;
+}
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
+                                       REAL* svm_values, bool follow_next)
+{      
+       if (PEN==NULL)
+               return 0 ;
+       if (PEN->use_svm)
+               return lookup_penalty_svm(PEN, p_value, svm_values) ;
+               
+       if ((p_value<PEN->min_len) || (p_value>PEN->max_len))
+               return -CMath::INFTY ;
+       
+       if (PEN->cache!=NULL && (p_value>=0) && (p_value<=PEN->max_len))
+       {
+               REAL ret=PEN->cache[p_value] ;
+               if (PEN->next_pen && follow_next)
+                       ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+               return ret ;
+       }
+       
+       REAL d_value = (REAL) p_value ;
+       switch (PEN->transform)
+       {
+       case T_LINEAR:
+               break ;
+       case T_LOG:
+               d_value = log(d_value) ;
+               break ;
+       case T_LOG_PLUS1:
+               d_value = log(d_value+1) ;
+               break ;
+       case T_LOG_PLUS3:
+               d_value = log(d_value+3) ;
+               break ;
+       case T_LINEAR_PLUS3:
+               d_value = d_value+3 ;
+               break ;
+       default:
+               CIO::message(M_ERROR, "unknown transform\n") ;
+               break ;
+       }
+
+       INT idx = 0 ;
+       REAL ret ;
+       for (INT i=0; i<PEN->len; i++)
+               if (PEN->limits[i]<=d_value)
+                       idx++ ;
+       
+       if (idx==0)
+               ret=PEN->penalties[0] ;
+       else if (idx==PEN->len)
+               ret=PEN->penalties[PEN->len-1] ;
+       else
+       {
+               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
+                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
+       }
+       //if (p_value>=30 && p_value<150)
+       //fprintf(stderr, "%s %i(%i) -> %1.2f\n", PEN->name, p_value, idx, ret) ;
+       
+       if (PEN->next_pen && follow_next)
+               ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
+
+       return ret ;
+}
diff --git a/DynProg/penalty_info.h b/DynProg/penalty_info.h
new file mode 100644 (file)
index 0000000..30de4aa
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef __PENALTY_INFO_H__
+#define __PENALTY_INFO_H__
+
+#include "common.h"
+#include "Mathmatics.h"
+
+enum ETransformType
+{
+       T_LINEAR,
+       T_LOG,
+       T_LOG_PLUS1,
+       T_LOG_PLUS3,
+       T_LINEAR_PLUS3
+}  ;
+
+struct penalty_struct
+{
+       INT len ;
+       REAL *limits ;
+       REAL *penalties ;
+       INT max_len ;
+       INT min_len ;
+       REAL *cache ;
+       enum ETransformType transform ;
+       INT id ;
+       struct penalty_struct *next_pen ;
+       char * name ;
+       INT use_svm ;
+} ;
+
+void init_penalty_struct(struct penalty_struct &PEN) ;
+void delete_penalty_struct_palma(struct penalty_struct &PEN) ;
+//void delete_penalty_struct_array(struct penalty_struct *PEN, INT len) ;
+void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp);
+
+//#ifdef HAVE_MATLAB
+//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, mwSize &P) ;
+//#endif
+
+REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
+                                       REAL* svm_values, bool follow_next=true) ;
+
+enum mode { NORMAL, USE_QUALITY_SCORES};
+
+#endif
diff --git a/DynProg/print_align.cpp b/DynProg/print_align.cpp
new file mode 100644 (file)
index 0000000..312cabc
--- /dev/null
@@ -0,0 +1,97 @@
+#include "fill_matrix.h"
+using namespace std;
+
+void print_align(Pre_score* matrix, int length_est,  int length_dna,Align_pair* vektor, int result_length, int print_matrix)
+  
+{
+  //Druck der matrix
+  switch(print_matrix)
+    {
+    case 1:
+      {
+       cout <<"Matrix: \n";
+       
+       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+         {
+           cout << "Zeile " << k << ":\t"; 
+           for (int l= 0; l<length_dna; l++)
+             {
+               cout << ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+             } 
+           cout <<"\n";
+         }
+       break;  
+      }
+      
+    case 2:
+      {
+       cout <<"Matrix: \n";
+       
+       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
+         {
+           cout << "Zeile " << k << ":\t"; 
+           for (int l= 0; l<length_dna; l++)
+             {
+               if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k-1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+                 cout << "\\" <<((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
+                 cout << "<"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k -1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l))
+                 cout << "|"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
+               else             { cout << ((Pre_score*)matrix + (k*length_dna) + l)->value << "("; 
+               cout << ((Pre_score*)matrix + (k*length_dna) + l)->prev_i << "," << ((Pre_score*)matrix + (k*length_dna) + l)->prev_j << ")\t";  
+               }
+               
+             } 
+           cout <<"\n";
+         }
+        
+       cout << "length_est" << length_est << "\n";
+        cout << "length_dna" << length_dna << "\n";
+        cout << "result_length: " << result_length << "\n";
+       
+       
+       break;  
+      } 
+      
+    case 3:
+      {
+       //Druck des Alignment
+       
+       // cout << "Alignment\n" ;
+       int j = 0;
+       // cout << "resultlength: " << result_length <<"\n";
+       while (j<result_length) {
+         int pacman = j;       
+         cout << "D:(" <<j<<")\t";
+         for (int i=0; ((j<result_length) && (i<50)); i++)
+           {
+             cout << vektor[j].dna_char << " ";        
+             j++;      
+           }
+         
+         cout << "\n"; 
+         
+         cout << "E:(" <<pacman<<")\t";
+         for (int i=0;((pacman<result_length) && (i<50)); i++)
+           {
+             cout << vektor[pacman].est_char << " "; 
+             pacman++;
+           }
+         cout << "\n\n";       
+       }               
+       
+       //Ende Druck Alignment
+       break ;
+      }
+      
+    default:
+      {
+       cout <<"";
+      }
+    }
+  
+  
+  
+}
+
diff --git a/DynProg/qpalma_dp.cpp b/DynProg/qpalma_dp.cpp
new file mode 100644 (file)
index 0000000..f55c1e2
--- /dev/null
@@ -0,0 +1,376 @@
+
+#include "qpalma_dp.h"
+#include <cstring>
+
+using namespace std;
+
+/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
+  myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
+  print_matrix) */
+
+Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
+      len = 0;
+      limits = 0;
+      penalties = 0;
+      max_len = 0;
+      min_len = 0;
+      cache = 0;
+      //enum ETransformType transform = T_LINEAR;
+      id = 0;
+      name = 0;
+      use_svm = 0;
+
+      // set ptrs to zero first
+      splice_align         = 0;
+      est_align            = 0;
+      mmatrix_param        = 0;
+      alignmentscores      = 0;
+      qualityFeaturesAllPaths = 0;
+      mlen = 6; // score matrix: length of 6 for "- A C G T N"
+
+      numQualSuppPoints = numq;
+      numPlifs = numQPlifs;
+      use_quality_scores = use_qscores;
+
+      //printf("number of support points: %d\n",numQualSuppPoints);
+      //printf("number of plifs: %d\n",numPlifs );
+      FA( numQualSuppPoints >= 0 );
+      FA( numPlifs >= 0 );
+}
+
+void Alignment::getDNAEST(){}
+
+void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+      int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
+      double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores, 
+      bool remove_duplicate_scores, bool print_matrix) {
+
+   // printf("Entering myalign...\n");
+   dna_len =  dna_len_p + 1 ;
+   est_len =  est_len_p + 1 ;
+   nr_paths = nr_paths_p;
+
+  /***************************************************************************/ 
+  // initialize alignment matrices and call fill_matrix()  
+  /***************************************************************************/
+
+  mode currentMode;
+  if (use_quality_scores) 
+   currentMode = USE_QUALITY_SCORES;
+  else
+   currentMode = NORMAL;
+
+    // dnaest
+   DNA_ARRAY = 0;
+   EST_ARRAY = 0;
+
+  //printf("[qpalma_dp] read is: %s\n",est);
+  
+  //possible donor positions
+  int nr_donor_sites = 0 ;
+  for (int ii=0; ii<d_len; ii++) {
+    if(isfinite(donor[ii])) {
+      nr_donor_sites++ ;
+    }
+  }
+
+  int* donor_sites = new int[nr_donor_sites];
+  int donor_idx = 0 ;
+  for (int ii=0; ii<d_len; ii++) {
+    if(isfinite(donor[ii])) {
+      donor_sites[donor_idx] = ii+1 ;
+      donor_idx++ ;
+    }  
+  }
+
+  int* max_score_positions = new int[nr_paths*2];
+
+  Pre_score** matrices = new Pre_score*[nr_paths];
+  //for (int z=0; z<nr_paths; z++) {
+  for (size_t  z=0; z<nr_paths; z++) {
+      matrices[z] = new Pre_score[dna_len * est_len];
+  }
+
+   //printf("qualityScores\n");
+   //for(int qidx=0;qidx<numPlifs;qidx++) {
+   //   penalty_struct p; 
+   //   p = qualityScores[qidx];
+   //   printf("%d ",qidx);
+   //   for(int pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",pidx,p.penalties[pidx]);
+   //   printf("\n");
+   //}
+
+   //int h_idx;
+   //for(h_idx=0;h_idx<h.len;h_idx++)
+   //   printf("%d %f %f\n",h_idx,h.limits[h_idx],h.penalties[h_idx]);
+  
+  //printf("calling fill_matrix...\n");
+  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+  //printf("after call to fill_matrix...\n");
+
+  /***************************************************************************/ 
+  // return arguments etc.
+  /***************************************************************************/
+  int result_length; //Eine Variable fuer alle Matrizen
+
+   splice_align_size = (dna_len-1)*nr_paths;
+   est_align_size = (est_len-1)*nr_paths;
+
+   int mmatrix_size;
+
+   if (currentMode == USE_QUALITY_SCORES) {
+      mmatrix_param_size = mlen*nr_paths;
+      mmatrix_size = mlen;
+   }
+
+   if (currentMode == NORMAL) {
+      mmatrix_param_size = (mlen*mlen)*nr_paths;
+      mmatrix_size = mlen*mlen;
+   }
+   
+   alignmentscores_size = nr_paths; //alignment score for each path/matrix
+   numPathsPlifs = numPlifs*nr_paths; //alignment score for each path/matrix
+
+   splice_align         = new int[splice_align_size];
+   est_align            = new int[est_align_size];
+   mmatrix_param        = new int[mmatrix_param_size];
+   alignmentscores      = new double[nr_paths]; //alignment score for each path/matrix
+
+
+  // printf("before memset...\n");
+  memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
+  memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
+  memset((char*)mmatrix_param, 0, mmatrix_size*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
+  memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
+  //printf("after memset...\n");
+  
+  qualityFeaturesAllPaths= new penalty_struct*[nr_paths];
+
+  //for (int z=0; z<nr_paths; z++) {
+  for (size_t z=0; z<nr_paths; z++) {
+    result_length = 0 ;
+    
+    int* s_align = splice_align + (dna_len-1)*z;  //pointer
+    int* e_align = est_align + (est_len-1)*z ;    //pointer
+    int* mparam  = mmatrix_param + mmatrix_size*z; //pointer
+
+   qualityFeaturesAllPaths[z] = new penalty_struct[numPlifs];
+
+   int qidx, pidx;
+   for(qidx=0;qidx<numPlifs;qidx++) {
+      penalty_struct p;
+      init_penalty_struct(p);
+      p.len = numQualSuppPoints;
+      p.limits = (double*) calloc(p.len,sizeof(double));
+
+      for(pidx=0;pidx<p.len;pidx++)
+         p.limits[pidx] = qualityScores[qidx].limits[pidx];
+
+      p.penalties = (double*) calloc(p.len,sizeof(double));
+      qualityFeaturesAllPaths[z][qidx] = p;
+   }
+
+   //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
+
+    //printf("before call to result_align...\n");
+    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityFeaturesAllPaths[z] , currentMode);
+    //printf("after call to result_align...\n");
+
+   //printf("z is %d\n",z);
+   //int len;
+   //for(qidx=0;qidx<numPlifs;qidx++) {
+   //   penalty_struct p;
+   //   p = qualityScoresAllPaths[z][qidx];
+   //   printf("%d: ",qidx);
+   //   for(pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",p.limits[pidx]);
+   //   printf("\n");
+
+   //   for(pidx=0;pidx<p.len;pidx++)
+   //      printf("%f ",p.penalties[pidx]);
+   //   printf("\n");
+   //}
+
+   if(z==0) {
+
+   if(DNA_ARRAY != 0) {
+       delete[] DNA_ARRAY;
+       delete[] EST_ARRAY;
+    }
+
+   result_len = result_length;
+   
+   DNA_ARRAY = new int[result_length];
+   EST_ARRAY = new int[result_length];
+
+    //backtracking
+    int i = max_score_positions[2*z] ; //i (est)
+    int j = max_score_positions[2*z +1] ; //j (dna)
+    int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
+    int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
+    int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
+    
+    for (int ii=result_length; ii>0; ii--) {
+      if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+      }
+      else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
+       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+       EST_ARRAY[ii-1] = 0 ; //gap
+      }
+      else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
+       DNA_ARRAY[ii-1] = 0 ; //gap
+       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
+      }
+      else {// splice site
+       for (j; j > prev_j; j--) {
+         DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
+         EST_ARRAY[ii-1] = 6 ; //intron
+         ii-- ;
+       }
+       ii++ ; // last ii-- too much (because done in for-loop) 
+      }
+      
+      i = prev_i;
+      j = prev_j;    
+      prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
+      prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
+      prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
+    }
+
+    } // end of "if z == 0"
+    
+   } //end of z
+
+   for (int i=nr_paths-1;i>=0; i--)
+      delete[] matrices[i];
+}
+
+void Alignment::getAlignmentResults(int* s_align, int* e_align,
+      int* mmatrix_p, double* alignscores, double* qScores) {
+
+   size_t idx;
+   for(idx=0; idx<splice_align_size; idx++)
+      s_align[idx] = splice_align[idx];
+
+   for(idx=0; idx<est_align_size; idx++)
+      e_align[idx] =  est_align[idx];
+
+   for(idx=0; idx<mmatrix_param_size; idx++)
+      mmatrix_p[idx] = mmatrix_param[idx];
+
+   for(idx=0; idx<alignmentscores_size; idx++)
+      alignscores[idx] = alignmentscores[idx];
+   
+   if (use_quality_scores) {
+      penalty_struct currentPlif;
+      int ctr=0;
+      //for (int z=0; z<nr_paths; z++) {
+      for (size_t z=0; z<nr_paths; z++) {
+         for(int estChar=1;estChar<6;estChar++) {
+            for(int dnaChar=0;dnaChar<6;dnaChar++) {
+
+               int currentPos = (estChar-1)*6+dnaChar;
+               currentPlif = qualityFeaturesAllPaths[z][currentPos];
+
+               for(int pidx=0; pidx<currentPlif.len; pidx++) {
+                  qScores[ctr] = currentPlif.penalties[pidx];
+                  //printf("%f ",qScores[ctr]);
+                  ctr++;
+               }
+               //printf("\n");
+      }}}
+
+      //printf("\nctr is %d\n",ctr);
+   }
+   //printf("Leaving getAlignmentResults...\n");
+}
+
+void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
+
+   size_t idx;
+   for(idx=0; idx<result_len; idx++) {
+      dna_align[idx] = DNA_ARRAY[idx];
+      est_align[idx] = EST_ARRAY[idx];
+   }
+}
+
+
+PyObject* Alignment::getAlignmentResultsNew() {
+
+   PyObject* py_splice_align  = PyList_New(splice_align_size);
+   PyObject* py_est_align     = PyList_New(est_align_size);
+   PyObject* py_mmatrix       = PyList_New(mmatrix_param_size);
+   PyObject* py_align_scores  = PyList_New(alignmentscores_size);
+   PyObject* py_q_scores      = PyList_New(0);
+
+   size_t idx;
+   for(idx=0; idx<splice_align_size; idx++)
+      PyList_SetItem(py_splice_align,idx,PyInt_FromLong(splice_align[idx]));
+   //s_align[idx] = splice_align[idx];
+
+   for(idx=0; idx<est_align_size; idx++)
+      PyList_SetItem(py_est_align,idx,PyInt_FromLong(est_align[idx]));
+
+   //e_align[idx] =  est_align[idx];
+
+   for(idx=0; idx<mmatrix_param_size; idx++)
+      PyList_SetItem(py_mmatrix,idx,PyInt_FromLong(mmatrix_param[idx]));
+
+   //mmatrix_p[idx] = mmatrix_param[idx];
+
+   for(idx=0; idx<alignmentscores_size; idx++)
+      PyList_SetItem(py_align_scores,idx,PyFloat_FromDouble(alignmentscores[idx]));
+
+   //alignscores[idx] = alignmentscores[idx];
+   
+   if (use_quality_scores) {
+      penalty_struct currentPlif;
+      int ctr=0;
+      for (size_t z=0; z<nr_paths; z++) {
+      //for (int z=0; z<nr_paths; z++) {
+         for(int estChar=1;estChar<6;estChar++) {
+            for(int dnaChar=0;dnaChar<6;dnaChar++) {
+
+               int currentPos = (estChar-1)*6+dnaChar;
+               currentPlif = qualityFeaturesAllPaths[z][currentPos];
+
+               for(int pidx=0; pidx<currentPlif.len; pidx++) {
+                  //qScores[ctr] = currentPlif.penalties[pidx];
+                  //printf("%f ",qScores[ctr]);
+                  PyList_Append(py_q_scores,PyFloat_FromDouble(currentPlif.penalties[pidx]));
+                  ctr++;
+               }
+               //printf("\n");
+      }}}
+      //printf("\nctr is %d\n",ctr);
+   }
+
+   PyObject* result = PyTuple_Pack(5,
+   py_splice_align, py_est_align, py_mmatrix,
+   py_align_scores, py_q_scores);
+
+   //PyTuple_SetItem(result,0,py_splice_align);
+   //PyTuple_SetItem(result,1,py_est_align);
+   //PyTuple_SetItem(result,2,py_mmatrix);
+   //PyTuple_SetItem(result,3,py_align_scores);
+   //PyTuple_SetItem(result,4,py_q_scores);
+
+   return result;
+}
+
+PyObject* Alignment::getAlignmentArraysNew() {
+
+   PyObject* py_dna_align = PyList_New(result_len);
+   PyObject* py_est_align = PyList_New(result_len);
+
+   for(size_t idx=0; idx<result_len; idx++) {
+      PyList_SetItem(py_dna_align,idx,PyInt_FromLong(DNA_ARRAY[idx]));
+      PyList_SetItem(py_est_align,idx,PyInt_FromLong(EST_ARRAY[idx]));
+   }
+
+   PyObject* result = PyTuple_Pack(2, py_dna_align, py_est_align);
+   return result;
+}
diff --git a/DynProg/qpalma_dp.h b/DynProg/qpalma_dp.h
new file mode 100644 (file)
index 0000000..07c5f37
--- /dev/null
@@ -0,0 +1,146 @@
+#ifndef _QPALMA_DP_H_
+#define _QPALMA_DP_H_
+
+#include "Python.h"
+
+#include "penalty_info.h"
+#include "debug_tools.h"
+
+struct ArrayElem { //24B
+  double score;
+  int prev_j;
+  int prev_i;
+  int prev_matrix_no;   
+  bool isSplice;
+} ;
+
+struct pre_score { //24B
+  double value; //8
+  int prev_i; //4
+  int prev_j; //4
+  int prev_matrix_no;//4
+  bool issplice;//4
+};
+
+typedef struct pre_score Pre_score;
+
+typedef struct Align_pair { //24B
+  double value ; 
+  bool issplice;
+  int previ,prevj ;
+  char est_char;
+  char dna_char;
+};
+
+void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
+
+int check_char(char base);
+
+bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode);
+
+extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
+
+
+/* 
+ * The function myalign calculates a new alignment given the scoring scheme
+ * Its input arguments are:
+ *
+ * num_path(id)               ->    an integer specifying which alignment has to be done
+ * dna                        ->    
+ * est                        ->
+ * h                          ->
+ * mmatrix                    -> 
+ * donor                      ->
+ * acceptor                   ->
+ * remove_duplicate_scores    -> a boolean
+ * print_matrix               -> a boolean
+ *
+ * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
+ *
+ * the idea of the qualityScores array is as follows
+ *
+ * consider a matrix of 24 plifs
+ * 
+ * -> row major
+ *
+ */
+
+class Alignment {
+
+   private:
+      int* splice_align;
+      int* est_align;
+      int* mmatrix_param;
+      double* alignmentscores;
+      struct penalty_struct** qualityFeaturesAllPaths;
+
+      size_t dna_len;
+      size_t est_len;
+      size_t mlen;
+      size_t nr_paths;
+
+      size_t result_len;
+      int* DNA_ARRAY;
+      int* EST_ARRAY;
+
+      int numQualSuppPoints;
+      int numPlifs;
+      bool use_quality_scores;
+
+      uint splice_align_size ;
+      uint est_align_size ;
+      uint mmatrix_param_size ;
+      uint alignmentscores_size ;
+      uint numPathsPlifs ;
+
+      INT len;
+      REAL *limits;
+      REAL *penalties;
+      INT max_len;
+      INT min_len;
+      REAL *cache;
+      enum ETransformType transform ;
+      INT id;
+      char * name;
+      INT use_svm;
+
+   public:
+      Alignment(int numQPlifs ,int numq, bool use_qscores);
+      ~Alignment() {
+         if(splice_align != 0)
+            delete[] splice_align;
+
+         if(est_align != 0)
+            delete[] est_align;
+
+         if(mmatrix_param != 0)
+            delete[] mmatrix_param;
+
+         if(alignmentscores != 0)
+            delete[] alignmentscores;
+
+         if(qualityFeaturesAllPaths != 0)
+            delete[] qualityFeaturesAllPaths;
+      }
+
+      void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
+      int est_len_p, double* prb, double* chastity, struct penalty_struct h, 
+      double* matchmatrix, int mm_len, double* donor, int d_len, double* acceptor,
+      int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
+      bool print_matrix);
+
+      void getDNAEST();
+
+      int getResultLength() { return result_len; }
+
+      void getAlignmentResults(int* s_align, int* e_align,
+      int* mmatrix_p, double* alignscores, double* qScores);
+    
+      void getAlignmentArrays(int* dna_align, int* est_align);
+
+
+      PyObject* getAlignmentResultsNew();
+      PyObject* getAlignmentArraysNew();
+};
+
+#endif  // _QPALMA_DP_H_
diff --git a/DynProg/result_align.cpp b/DynProg/result_align.cpp
new file mode 100644 (file)
index 0000000..2386360
--- /dev/null
@@ -0,0 +1,251 @@
+#include "assert.h"
+#include "fill_matrix.h"
+#include "debug_tools.h"
+
+using namespace std;
+
+void increaseFeatureCount(penalty_struct* qparam, int dnaChar, int estChar, double estprb) {
+
+   FA(estChar > 0 && estChar < 6);
+   FA(dnaChar > -1 && dnaChar < 6);
+
+   int currentPos = (estChar-1)*6+dnaChar;
+
+   penalty_struct currentStruct = qparam[currentPos];
+
+   //printf("Current index %d est/dna: %d %d score %f\n",currentPos,estChar,dnaChar,estprb);
+
+   //if( estChar == 1 and dnaChar == 1) {
+   //   printf("before\n");
+   //   int p_idx;
+   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
+   //      printf("%f ",currentStruct.limits[p_idx]);
+   //   }
+   //   printf("\n");
+
+   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
+   //      printf("%f ",currentStruct.penalties[p_idx]);
+   //   }
+   //   printf("\n");
+   //}
+
+   double value = estprb;
+   int Lower = 0;
+   int idx;
+
+   for (idx=0;idx<currentStruct.len;idx++) {
+      if (currentStruct.limits[idx] <= value)
+         Lower++;
+   }
+
+   if (Lower == 0) {
+         currentStruct.penalties[0] += 1;
+         qparam[currentPos] = currentStruct;
+         return;
+   }
+
+   if (Lower == currentStruct.len) {
+         currentStruct.penalties[currentStruct.len-1] += 1;
+         qparam[currentPos] = currentStruct;
+         return;
+   }
+
+   Lower -= 1;
+   int Upper = Lower+1; // x-werte bleiben fest
+
+   double weightup  = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
+   double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
+   currentStruct.penalties[Upper] += weightup;
+   currentStruct.penalties[Lower] += weightlow; 
+
+   //if( estChar == 1 and dnaChar == 1) {
+   //   printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
+   //   int p_idx;
+   //   printf("after\n");
+   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
+   //      printf("%f ",currentStruct.limits[p_idx]);
+   //   }
+   //   printf("\n");
+   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++)
+   //      printf("%f ",currentStruct.penalties[p_idx]);
+   //   printf("\n");
+   //}
+   qparam[currentPos] = currentStruct;
+}
+
+bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
+//Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
+
+{  
+  const int mlen=6; // length of matchmatrix
+  int startend[4] ; // [i_start, j_start, i_end, j_end] ;
+  startend[3] = max_score_positions[2*z] ; //i (est)
+  startend[4] = max_score_positions[2*z +1] ; //j (dna)
+  //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
+  int dnanum ; //using check_char: letter -> number
+  int estnum ; //using check_char: letter -> number
+
+  double prbnum ;
+  double chastitynum ;
+
+  int dna_pos ;
+  int est_pos ;
+  int splice_state ;
+  int est_state ;
+  for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
+    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+  for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
+    e_align[est_pos-1] = 4 ;
+  }
+  //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
+
+  /* starting point for backtracking */
+  int i = startend[3] ; 
+  int j = startend[4] ;
+  int act_m_no  = z ;
+  alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
+  //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
+
+  int prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i; 
+  int prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
+  int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+
+  //printf("Current z is %d\n",z);
+
+  if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
+  {
+    cout << "No additional path left (inf)!\n";
+    return 1;
+  }
+
+  if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
+    cout << "No additional path left (0)!\n";
+    return 1;
+  }
+
+  assert(dna_pos == j) ;
+  assert(est_pos == i) ;
+
+  splice_state = 0 ; //exon
+  est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
+
+  //compute length of alignment (between max(m,n) and m+n)     
+  //local_alignment: backtracking to first Zero: look at actual value
+  while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
+    
+    //just some info
+    //if ((z>0) && (!(z==prev_m_no))) {
+    //  printf("(%d,%d): next step in next matrix\n", i,j) ;
+    //}
+
+    if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      s_align[dna_pos-1] = splice_state; 
+      dna_pos-- ;
+      e_align[est_pos-1] = est_state ; //1 or 2, depended
+      est_pos-- ;
+      
+      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+
+      prbnum = prb[i-1];
+      chastitynum = chastity[i-1];
+
+      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
+
+      if(currentMode == USE_QUALITY_SCORES)
+         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      else
+         mparam[mlen*dnanum+estnum] ++ ;
+
+    }
+    else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      s_align[dna_pos-1] = splice_state; 
+      dna_pos-- ;
+
+      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
+      estnum = 0 ; //gap
+
+      if(currentMode == USE_QUALITY_SCORES)
+         mparam[dnanum] ++ ;
+      else
+         mparam[mlen*dnanum +estnum] ++ ;
+
+    }
+    else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
+      (*result_length_ptr)= (*result_length_ptr) + 1;
+      
+      e_align[est_pos-1] = est_state ;  //1 or 2, depended
+      est_pos-- ;
+
+      dnanum = 0 ; //gap
+      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
+
+      prbnum = prb[i-1];
+      chastitynum = chastity[i-1];
+
+      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
+
+      if(currentMode == USE_QUALITY_SCORES)
+         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
+      else
+         mparam[mlen*dnanum +estnum] ++ ;
+    }
+    else {// splice site
+      (*result_length_ptr) =  (*result_length_ptr) + (j - prev_j);
+      if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
+       printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
+       printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
+       printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
+       printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
+       printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
+       printf("j: %d -> prev_j:%d\n", j, prev_j) ;
+       printf("i: %d -> prev_i:%d\n", i, prev_i) ;
+      }
+      if (est_state==1) { //was exon labeled "1"
+       est_state = 2 ; //new exon is labeled "2"
+      }
+      else {
+       est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
+      }
+      for (j; j > prev_j; j--) {
+       if (splice_state == 0) { //coming from exon
+         splice_state = 2 ; //first intron_state: acceptor
+       } else {
+         splice_state = 3 ; //intron
+       }
+       if (j-1 == prev_j) { //last intron_state: donor
+         splice_state = 1 ;//donor
+       }
+       s_align[dna_pos-1] = splice_state; 
+       dna_pos-- ;
+      }
+      splice_state = 0 ; //exon again
+      
+    }
+   
+    startend[1] = i;
+    startend[2] = j;
+    i = prev_i;
+    j = prev_j;
+    act_m_no = prev_m_no ;
+    
+    prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i; 
+    prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j; 
+    prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
+  }
+  for (dna_pos; dna_pos>0; dna_pos--) {
+    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+  for (est_pos; est_pos>0; est_pos--) {
+    e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
+  }
+
+  //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
+  //printf("result_length: %d\n", *result_length_ptr) ;
+  return 0;
+}
diff --git a/DynProg/test_fill_matrix.cpp b/DynProg/test_fill_matrix.cpp
new file mode 100644 (file)
index 0000000..b6d47ae
--- /dev/null
@@ -0,0 +1,178 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <fstream>
+#include <sstream>
+#include <ctime>
+#include <sys/timeb.h>  
+using namespace std;
+
+#include "qpalma_dp.h"
+
+void initialize_quality_plifs(penalty_struct* qplifs, double* params) {
+
+   printf("begin of initialize_quality_plifs \n");
+   double limits[10] = {-5.0, 0.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0};
+
+   int idx, pos;
+   int ctr = 36;
+
+   for(idx=0;idx<30;idx++) {
+      penalty_struct new_plif;
+      init_penalty_struct(new_plif);
+   
+      new_plif.min_len = -5;
+      new_plif.max_len = 40;
+      new_plif.len = 10;
+
+      new_plif.limits    = (double*) malloc(sizeof(double)*10);
+      if (new_plif.limits == NULL)
+         printf("malloc (limits)\n");
+
+      new_plif.penalties = (double*) malloc(sizeof(double)*10);
+      if (new_plif.penalties == NULL)
+         printf("malloc (penalties)\n");
+
+      for(pos=0;pos<10;pos++) {
+         new_plif.limits[pos]     = limits[pos];
+         new_plif.penalties[pos]  = params[ctr];
+         ctr++;
+      }
+
+      qplifs[idx] = new_plif;
+   }
+   
+   printf("end of initialize_quality_plifs \n");
+   return;
+}
+
+
+int main(int argc, char** argv){
+
+   printf("Starting test_fm\n");
+   int idx;
+   char* alphabet = (char*) malloc(sizeof(char)*4);
+
+   alphabet[0] = 'a';
+   alphabet[1] = 'c';
+   alphabet[2] = 'g';
+   alphabet[3] = 't';
+
+   int est_len =  36;
+   int dna_len = 10000;
+   int nr_paths = 2;
+
+   int nr_donor_sites = 0 ;
+   int d_len = dna_len;
+
+   printf("begin of donor/acceptor stuff\n");
+   double* donor     = (double*) malloc(sizeof(double)*dna_len);
+   double* acceptor  = (double*) malloc(sizeof(double)*dna_len);
+
+   for(idx=0;idx<dna_len;idx++) {
+      donor[idx]     = drand48();
+      acceptor[idx]  = drand48();
+   }
+
+   for (int ii=0; ii<d_len; ii++) {
+    if(isfinite(donor[ii])) {
+      nr_donor_sites++ ;
+    }
+   }
+
+   int* donor_sites = new int[nr_donor_sites];
+   int donor_idx = 0 ;
+   for (int ii=0; ii<d_len; ii++) {
+    if(isfinite(donor[ii])) {
+      donor_sites[donor_idx] = ii+1 ;
+      donor_idx++ ;
+    }  
+   }
+
+   printf("end of donor/acceptor stuff\n");
+
+   Pre_score** matrices = new Pre_score*[nr_paths];
+   for (int z=0; z<nr_paths; z++)
+      matrices[z] = new Pre_score[dna_len * est_len];
+
+   char* est = (char*) malloc(sizeof(char)*est_len);
+   char* dna = (char*) malloc(sizeof(char)*dna_len);
+
+   for(idx=0;idx<est_len;idx++)
+      est[idx] = alphabet[lrand48()%4];
+
+   for(idx=0;idx<dna_len;idx++)
+      dna[idx] = alphabet[lrand48()%4];
+      
+   double* prb = (double*) malloc(sizeof(double)*est_len);
+
+   for(idx=0;idx<est_len;idx++)
+      prb[idx] = lrand48() % 45;
+
+   double* matchmatrix = (double*) malloc(sizeof(double)*6);
+   matchmatrix[0] = 0;
+   matchmatrix[1] = -1;
+   matchmatrix[2] = -1;
+   matchmatrix[3] = -1;
+   matchmatrix[4] = -1;
+   matchmatrix[5] = 0;
+
+   REAL limits_array[10] = {19.999999999999993, 33.362010744001154, 55.651188044142465, 92.831776672255558, 154.85273653622525, 258.30993300297655, 430.88693800637651, 718.76273276092525, 1198.968500637882, 1999.9999999999982};
+   REAL pen_array[10] = {0.024414829630412388, 0.025836097651978609, 0.17473437525993854, 0.46902269254060858, -0.14383509580738071, -0.20766155250201096, -0.32734767088988909, -0.18507220858585502, -0.097862557903448902, -0.034886887426478781};
+
+   penalty_struct functions;
+
+   init_penalty_struct(functions);
+
+   functions.min_len = 20;
+   functions.max_len = 2000;
+   functions.len = 10;
+   functions.limits = &limits_array[0];
+   functions.penalties = &pen_array[0];
+
+   penalty_struct* qualityScores = (penalty_struct*) malloc(sizeof(penalty_struct)*30);
+
+   int num_params = 336;
+   double* params = (double*) malloc(sizeof(double)*num_params);
+
+   double parameter;
+   int ctr = 0;
+
+   string s;
+   ifstream infile("param.txt");
+
+   if (infile.fail()) 
+     printf("Could not open file param.txt for reading.\n");
+
+   while (getline(infile, s)) {
+      if (s.length() == 0 || s[0] == '#') continue;
+
+      istringstream iss(s);
+      iss >> parameter;
+      params[ctr] = parameter;
+      ctr++;
+   }
+
+   infile.close();
+   
+   initialize_quality_plifs(qualityScores,params);
+
+   bool remove_duplicate_scores = true;
+   int* max_score_positions = new int[nr_paths*2];
+   mode currentMode = USE_QUALITY_SCORES;
+
+   //time_t startTime;
+   //time_t endTime;
+
+   printf("calling fill_matrix\n");
+
+   //time(&startTime);
+
+   for(idx=0;idx<1;idx++)
+      fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &functions, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
+
+   //time(&endTime);
+
+   //printf ("Scan time: %6.3f\n", difftime(endTime,startTime));
+
+   printf("End of test_fm \n");
+}
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644 (file)
index 0000000..d6c12e3
--- /dev/null
@@ -0,0 +1,5 @@
+recursive-include ParaParser *.cpp *.h *.i Makefile
+recursive-include DynProg *.cpp *.h *.i Makefile
+recursive-include doc *.tex Makefile
+include test.conf
+
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..20cbdbd
--- /dev/null
+++ b/README
@@ -0,0 +1,34 @@
+Installation instructions for QPalma Version 1.0.
+  
+The package requires version 2.5 or newer of Python, and is built from 
+source, so the header files and libraries for Python must be installed, 
+as well as the core binaries.
+
+
+Instructions
+------------
+
+Type
+
+   python setup.py build
+
+to build the necessary C++ extension modules and copy the QPalma python code.
+
+
+
+
+
+
+
+
+Troubleshooting
+---------------
+
+
+
+HELP!
+-----
+
+If however everything failes contact:
+
+fabio@tuebingen.mpg.de
index a6e07e5..d264a06 100644 (file)
@@ -217,11 +217,11 @@ prediction of one site is a real value.
 
 Dependencies so far
 
-SWIG
-numpy
-pythongrid
-genome_utils
-Genefinding
+SWIG
+numpy
+pythongrid
+- Genefinding doIntervalQuery
+
 
 \begin{thebibliography}{1}
 
diff --git a/dyn_prog/Makefile b/dyn_prog/Makefile
deleted file mode 100644 (file)
index 3a8afef..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-SRCS= Mathmatics.cpp\
-               fill_matrix.cpp\
-               qpalma_dp.cpp\
-               result_align.cpp\
-               debug_tools.cpp\
-               penalty_info.cpp\
-               print_align.cpp\
-               io.cpp
-
-HDRS= Mathmatics.h\
-               common.h\
-               config.h\
-               debug_tools.h\
-               fill_matrix.h\
-               io.h\
-               penalty_info.h\
-               qpalma_dp.h
-
-OBJS = $(SRCS:%.cpp=%.o)
-
-#CXXFLAGS=-O3 -fPIC -g -ggdb 
-CXXFLAGS=-Wall -Wshadow -std=c++98 -ggdb -O3 -fPIC -I/usr/include/python2.5
-LDFLAGS=-lprofiler -L/fml/ag-raetsch/home/fabio/own_libs/libunwind/lib -lunwind-x86_64 -lunwind
-
-
-IF=QPalmaDP
-
-all: $(OBJS) $(HDRS)
-       @ swig -c++ -python ${IF}.i
-       @ g++ $(CXXFLAGS) -I/usr/include/python2.5 -c ${IF}_wrap.cxx -o ${IF}_wrap.o
-       @ g++ $(CXXFLAGS) -shared -lpython2.5 $(OBJS) ${IF}_wrap.o -o _${IF}.so
-       @ python -c "import ${IF}"
-
-test: $(OBJS) $(HDRS) 
-       g++ $(CXXFLAGS) $(LDFLAGS) -o test_fm  debug_tools.o Mathmatics.o io.o penalty_info.o fill_matrix.o test_fill_matrix.cpp 
-
-clean:
-       @ rm *.o *.so *.cxx ${IF}.py ${IF}.pyc
diff --git a/dyn_prog/Mathmatics.cpp b/dyn_prog/Mathmatics.cpp
deleted file mode 100644 (file)
index 61298da..0000000
+++ /dev/null
@@ -1,349 +0,0 @@
-// Math.cpp: implementation of the CMath class.
-//
-//////////////////////////////////////////////////////////////////////
-
-
-#include "Mathmatics.h"
-#include "io.h"
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
-#include <assert.h>
-
-//////////////////////////////////////////////////////////////////////
-// Construction/Destruction
-//////////////////////////////////////////////////////////////////////
-
-#ifdef USE_LOGCACHE
-//gene/math specific constants
-#ifdef USE_HMMDEBUG
-#define MAX_LOG_TABLE_SIZE 10*1024*1024
-#define LOG_TABLE_PRECISION 1e-6
-#else
-#define MAX_LOG_TABLE_SIZE 123*1024*1024
-#define LOG_TABLE_PRECISION 1e-15
-#endif
-
-INT CMath::LOGACCURACY         = 0; // 100000 steps per integer
-#endif
-
-INT CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
-
-#ifdef USE_PATHDEBUG
-const REAL CMath::INFTY            =  1e11;            // infinity
-#else
-const REAL CMath::INFTY            =  -log(0.0);       // infinity
-#endif
-const REAL CMath::ALMOST_NEG_INFTY =  -1000;   
-
-CHAR CMath::rand_state[256];
-
-CMath::CMath()
-{
-       struct timeval tv;
-       gettimeofday(&tv, NULL);
-       UINT seed=(UINT) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
-       initstate(seed, CMath::rand_state, sizeof(CMath::rand_state));
-       CIO::message(M_INFO, "seeding random number generator with %u\n", seed);
-
-#ifdef USE_LOGCACHE
-    LOGRANGE=CMath::determine_logrange();
-    LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
-    CIO::message(M_INFO, "Initializing log-table (size=%i*%i*%i=%2.1fMB) ...",LOGRANGE,LOGACCURACY,sizeof(REAL),LOGRANGE*LOGACCURACY*sizeof(REAL)/(1024.0*1024.0)) ;
-   
-    CMath::logtable=new REAL[LOGRANGE*LOGACCURACY];
-    init_log_table();
-    CIO::message(M_INFO, "Done.\n") ;
-#else
-       INT i=0;
-       while ((REAL)log(1+((REAL)exp(-REAL(i)))))
-               i++;
-    CIO::message(M_INFO, "determined range for x in log(1+exp(-x)) is:%d\n", i);
-       LOGRANGE=i;
-#endif 
-}
-
-CMath::~CMath()
-{
-#ifdef USE_LOGCACHE
-       delete[] logtable;
-#endif
-}
-
-#ifdef USE_LOGCACHE
-INT CMath::determine_logrange()
-{
-    INT i;
-    REAL acc=0;
-    for (i=0; i<50; i++)
-    {
-       acc=((REAL)log(1+((REAL)exp(-REAL(i)))));
-       if (acc<=(REAL)LOG_TABLE_PRECISION)
-           break;
-    }
-
-    CIO::message(M_INFO, "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
-    return i;
-}
-
-INT CMath::determine_logaccuracy(INT range)
-{
-    range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(REAL));
-    CIO::message(M_INFO, "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
-    return range;
-}
-
-//init log table of form log(1+exp(x))
-void CMath::init_log_table()
-{
-  for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
-    logtable[i]=log(1+exp(REAL(-i)/REAL(LOGACCURACY)));
-}
-#endif
-
-void CMath::sort(INT *a, INT cols, INT sort_col)
-{
-  INT changed=1;
-  if (a[0]==-1) return ;
-  while (changed)
-  {
-      changed=0; INT i=0 ;
-      while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
-         {
-                 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
-                 {
-                         for (INT j=0; j<cols; j++)
-                                 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
-                         changed=1 ;
-                 } ;
-                 i++ ;
-         } ;
-  } ;
-} 
-
-void CMath::sort(REAL *a, INT* idx, INT N) 
-{
-
-       INT changed=1;
-       while (changed)
-       {
-               changed=0;
-               for (INT i=0; i<N-1; i++)
-               {
-                       if (a[i]>a[i+1])
-                       {
-                               swap(a[i],a[i+1]) ;
-                               swap(idx[i],idx[i+1]) ;
-                               changed=1 ;
-                       } ;
-               } ;
-       } ;
-        
-} 
-
-
-
-//plot x- axis false positives (fp) 1-Specificity
-//plot y- axis true positives (tp) Sensitivity
-INT CMath::calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile)
-{
-       INT left=0;
-       INT right=size-1;
-       INT i;
-
-       for (i=0; i<size; i++)
-       {
-               if (!(label[i]== -1 || label[i]==1))
-                       return -1;
-       }
-
-       //sort data such that -1 labels first +1 behind
-       while (left<right)
-       {
-               while ((label[left] < 0) && (left<right))
-                       left++;
-               while ((label[right] > 0) && (left<right))      //warning: label must be either -1 or +1
-                       right--;
-
-               swap(output[left],output[right]);
-               swap(label[left], label[right]);
-       }
-
-       // neg/pos sizes
-       negsize=left;
-       possize=size-left;
-       REAL* negout=output;
-       REAL* posout=&output[left];
-
-       // sort the pos and neg outputs separately
-       qsort(negout, negsize);
-       qsort(posout, possize);
-
-       // set minimum+maximum values for decision-treshhold
-       REAL minimum=min(negout[0], posout[0]);
-       REAL maximum=minimum;
-       if (negsize>0)
-               maximum=max(maximum,negout[negsize-1]);
-       if (possize>0)
-               maximum=max(maximum,posout[possize-1]);
-
-       REAL treshhold=minimum;
-       REAL old_treshhold=treshhold;
-
-       //clear array.
-       for (i=0; i<size; i++)
-       {
-               fp[i]=1.0;
-               tp[i]=1.0;
-       }
-
-       //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
-       //everything right of {pos,neg}idx is considered to beLONG to +1
-       INT posidx=0;
-       INT negidx=0;
-       INT iteration=1;
-       INT returnidx=-1;
-
-       REAL minerr=10;
-
-       while (iteration < size && treshhold<=maximum)
-       {
-               old_treshhold=treshhold;
-
-               while (treshhold==old_treshhold && treshhold<=maximum)
-               {
-                       if (posidx<possize && negidx<negsize)
-                       {
-                               if (posout[posidx]<negout[negidx])
-                               {
-                                       if (posout[posidx]==treshhold)
-                                               posidx++;
-                                       else
-                                               treshhold=posout[posidx];
-                               }
-                               else
-                               {
-                                       if (negout[negidx]==treshhold)
-                                               negidx++;
-                                       else
-                                               treshhold=negout[negidx];
-                               }
-                       }
-                       else
-                       {
-                               if (posidx>=possize && negidx<negsize-1)
-                               {
-                                       negidx++;
-                                       treshhold=negout[negidx];
-                               }
-                               else if (negidx>=negsize && posidx<possize-1)
-                               {
-                                       posidx++;
-                                       treshhold=posout[posidx];
-                               }
-                               else if (negidx<negsize && treshhold!=negout[negidx])
-                                       treshhold=negout[negidx];
-                               else if (posidx<possize && treshhold!=posout[posidx])
-                                       treshhold=posout[posidx];
-                               else
-                               {
-                                       treshhold=2*(maximum+100); // force termination
-                                       posidx=possize;
-                                       negidx=negsize;
-                                       break;
-                               }
-                       }
-               }
-
-               //calc tp,fp
-               tp[iteration]=(possize-posidx)/(REAL (possize));
-               fp[iteration]=(negsize-negidx)/(REAL (negsize));
-
-               //choose poINT with minimal error
-               if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
-               {
-                       minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
-                       tresh=(old_treshhold+treshhold)/2;
-                       returnidx=iteration;
-               }
-
-               iteration++;
-       }
-
-       // set new size
-       size=iteration;
-
-       if (rocfile)
-       {
-               const CHAR id[]="ROC";
-               fwrite(id, sizeof(char), sizeof(id), rocfile);
-               fwrite(fp, sizeof(REAL), size, rocfile);
-               fwrite(tp, sizeof(REAL), size, rocfile);
-       }
-
-       return returnidx;
-}
-
-UINT CMath::crc32(BYTE *data, INT len)
-{
-    UINT        result;
-    INT                 i,j;
-    BYTE       octet;
-
-    result = 0-1;
-
-    for (i=0; i<len; i++)
-    {
-       octet = *(data++);
-       for (j=0; j<8; j++)
-       {
-           if ((octet >> 7) ^ (result >> 31))
-           {
-               result = (result << 1) ^ 0x04c11db7;
-           }
-           else
-           {
-               result = (result << 1);
-           }
-           octet <<= 1;
-       }
-    }
-
-    return ~result; 
-}
-
-double CMath::mutual_info(REAL* p1, REAL* p2, INT len)
-{
-       double e=0;
-
-       for (INT i=0; i<len; i++)
-               for (INT j=0; j<len; j++)
-                       e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
-
-       return e;
-}
-
-double CMath::relative_entropy(REAL* p, REAL* q, INT len)
-{
-       double e=0;
-
-       for (INT i=0; i<len; i++)
-               e+=exp(p[i])*(p[i]-q[i]);
-
-       return e;
-}
-
-double CMath::entropy(REAL* p, INT len)
-{
-       double e=0;
-
-       for (INT i=0; i<len; i++)
-               e-=exp(p[i])*p[i];
-
-       return e;
-}
diff --git a/dyn_prog/Mathmatics.h b/dyn_prog/Mathmatics.h
deleted file mode 100644 (file)
index 2e24d48..0000000
+++ /dev/null
@@ -1,546 +0,0 @@
-#ifndef __MATHMATICS_H_
-#define __MATHMATICS_H_
-
-#include "common.h"
-#include "io.h"
-
-#include <math.h>
-#include <stdio.h>
-
-//define finite for win32
-#ifdef _WIN32
-#include <float.h>
-#define finite _finite
-#define isnan _isnan
-#endif
-
-#ifndef NAN
-#include <stdlib.h>
-#define NAN (strtod("NAN",NULL))
-#endif
-
-#ifdef SUNOS
-extern "C" int finite(double);
-#endif
-
-/** Mathematical Functions.
- * Class which collects generic mathematical functions
- */
-class CMath  
-{
-public:
-       /**@name Constructor/Destructor.
-        */
-       //@{
-       ///Constructor - initializes log-table
-       CMath();
-
-       ///Destructor - frees logtable
-       virtual ~CMath();
-       //@}
-
-       /**@name min/max/abs functions.
-       */
-       //@{
-
-       ///return the minimum of two integers
-       //
-       template <class T>
-       static inline T min(T a, T b)
-       {
-               return ((a)<=(b))?(a):(b);
-       }
-
-       ///return the maximum of two integers
-       template <class T>
-       static inline T max(T a, T b) 
-       {
-               return ((a)>=(b))?(a):(b);
-       }
-
-       ///return the maximum of two integers
-       template <class T>
-       static inline T abs(T a)
-       {
-               return ((a)>=(0))?(a):(-a);
-       }
-       //@}
-
-       /**@name misc functions */
-       //@{
-
-       /// crc32
-       static UINT crc32(BYTE *data, INT len);
-
-       static inline REAL round(REAL d)
-       {
-               return floor(d+0.5);
-       }
-
-       /// signum of type T variable a 
-       template <class T>
-       static inline REAL sign(REAL a)
-       {
-               if (a==0)
-                       return 0;
-               else return (a<0) ? (-1) : (+1);
-       }
-
-       /// swap floats a and b
-       template <class T>
-       static inline void swap(T & a,T &b)
-       {
-               T c=a ;
-               a=b; b=c ;
-       }
-
-       /// x^2
-       template <class T>
-       static inline T sq(T x)
-       {
-               return x*x;
-       }
-
-       static inline LONG factorial(INT n)
-       {
-               LONG res=1 ;
-               for (int i=2; i<=n; i++)
-                       res*=i ;
-               return res ;
-       }
-
-       static inline LONG nchoosek(INT n, INT k)
-       {
-               long res=1 ;
-
-               for (INT i=n-k+1; i<=n; i++)
-                       res*=i ;
-
-               return res/factorial(k) ;
-       }
-
-       /** performs a bubblesort on a given matrix a.
-        * it is sorted from in ascending order from top to bottom
-        * and left to right */
-       static void sort(INT *a, INT cols, INT sort_col=0) ;
-       static void sort(REAL *a, INT*idx, INT N) ;
-       
-       /** performs a quicksort on an array output of length size
-        * it is sorted from in ascending (for type T) */
-       template <class T>
-       static void qsort(T* output, INT size) ;
-
-       /** performs a quicksort on an array output of length size
-        * it is sorted from in ascending order 
-        * (for type T1) and returns the index (type T2)
-        * matlab alike [sorted,index]=sort(output) 
-        */
-       template <class T1,class T2>
-       static void qsort(T1* output, T2* index, INT size);
-
-       /** performs a quicksort on an array output of length size
-        * it is sorted from in ascending order
-        * (for type T1) and returns the index (type T2)
-        * matlab alike [sorted,index]=sort(output) 
-        */
-       template <class T1,class T2>
-       static void qsort_backward(T1* output, T2* index, INT size);
-
-     /* finds the smallest element in output and puts that element as the 
-               first element  */
-       template <class T>
-       static void min(REAL* output, T* index, INT size);
-
-     /* finds the n smallest elements in output and puts these elements as the 
-               first n elements  */
-       template <class T>
-       static void nmin(REAL* output, T* index, INT size, INT n);
-
-       /* performs a inplace unique of a vector of type T using quicksort 
-        * returns the new number of elements */
-       template <class T>
-       static INT unique(T* output, INT size) 
-       {
-               qsort(output, size) ;
-               INT i,j=0 ;
-               for (i=0; i<size; i++)
-                       if (i==0 || output[i]!=output[i-1])
-                               output[j++]=output[i] ;
-               return j ;
-       }
-
-       /* finds an element in a sorted array via binary search
-     * returns -1 if not found */
-       static inline INT fast_find(WORD* output, INT size, WORD elem)
-       {
-               INT start=0, end=size-1, middle=size/2 ;
-
-               if (output[start]>elem || output[end]<elem)
-                       return -1 ;
-
-               while (1)
-               {
-                       if (output[middle]>elem)
-                       {
-                               end = middle ;
-                               middle=start+(end-start)/2 ;
-                       } ;
-                       if (output[middle]<elem)
-                       {
-                               start = middle ;
-                               middle=start+(end-start)/2 ;
-                       }
-                       if (output[middle]==elem)
-                               return middle ;
-                       if (end-start<=1)
-                       {
-                               if (output[start]==elem)
-                                       return start ;
-                               if (output[end]==elem)
-                                       return end ;
-                               return -1 ;
-                       }
-               }
-       }
-
-       /* finds an element in a sorted array via binary search
-        * that is smaller-equal elem und the next element is larger-equal  */
-       static inline INT fast_find_range(REAL* output, INT size, REAL elem)
-       {
-               INT start=0, end=size-2, middle=(size-2)/2 ;
-
-               if (output[start]>elem)
-                       return -1 ;
-               if (output[end]<=elem)
-                       return size-1 ;
-
-               while (1)
-               {
-                       if (output[middle]>elem)
-                       {
-                               end = middle ;
-                               middle=start+(end-start)/2 ;
-                       } ;
-                       if (output[middle]<elem)
-                       {
-                               start = middle ;
-                               middle=start+(end-start)/2 ;
-                       }
-                       if (output[middle]<=elem && output[middle+1]>=elem)
-                               return middle ;
-                       if (end-start<=1)
-                       {
-                               if (output[start]<=elem && output[start+1]>=elem)
-                                       return start ;
-                               return end ;
-                       }
-               }
-       }
-
-       /** calculates ROC into (fp,tp)
-        * from output and label of length size 
-        * returns index with smallest error=fp+fn
-        */
-       static INT calcroc(REAL* fp, REAL* tp, REAL* output, INT* label, INT& size, INT& possize, INT& negsize, REAL& tresh, FILE* rocfile);
-       //@}
-
-       /// returns the mutual information of p which is given in logspace
-       /// where p,q are given in logspace
-       static double mutual_info(REAL* p1, REAL* p2, INT len);
-
-       /// returns the relative entropy H(P||Q), 
-       /// where p,q are given in logspace
-       static double relative_entropy(REAL* p, REAL* q, INT len);
-
-       /// returns entropy of p which is given in logspace
-       static double entropy(REAL* p, INT len);
-
-       /**@name summing functions */
-       //@{ 
-       /** sum logarithmic probabilities.
-        * Probability measures are summed up but are now given in logspace where
-        * direct summation of exp(operand) is not possible due to numerical problems, i.e. eg. exp(-1000)=0. Therefore
-        * we do log( exp(a) + exp(b)) = a + log (1 + exp (b-a)) where a is max(p,q) and b min(p,q). */
-#ifdef USE_LOGCACHE
-       static inline REAL logarithmic_sum(REAL p, REAL q)
-       {
-               if (finite(p))
-               {
-                       if (finite(q))
-                       {
-
-                               register REAL diff=p-q;
-
-                               if (diff>0)             //p>q
-                               {
-                                       if (diff > LOGRANGE)
-                                               return p;
-                                       else
-                                               return  p + logtable[(int)(diff*LOGACCURACY)];
-                               }
-                               else                    //p<=q
-                               {
-                                       if (-diff > LOGRANGE)
-                                               return  q;
-                                       else
-                                               return  q + logtable[(int)(-diff*LOGACCURACY)];
-                               }
-                       }
-                       CIO::message("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
-                       return NAN;
-               }
-               else 
-                       return q;
-       }
-
-       ///init log table of form log(1+exp(x))
-       static void init_log_table();
-       
-       /// determine INT x for that log(1+exp(-x)) == 0
-       static INT determine_logrange();
-
-       /// determine accuracy, such that the thing fits into MAX_LOG_TABLE_SIZE, needs logrange as argument
-       static INT determine_logaccuracy(INT range);
-#else
-       /*
-       inline REAL logarithmic_sum(REAL p, REAL q)
-       {
-           double result=comp_logarithmic_sum(p,q);
-
-           printf("diff:%f <-> %f\n",p-q, result);
-           return result;
-       }*/
-
-       static inline REAL logarithmic_sum(REAL p, REAL q)
-       {
-               if (finite(p))
-               {
-                       if (finite(q))
-                       {
-
-                               register REAL diff=p-q;
-                               if (diff>0)             //p>q
-                               {
-                                       if (diff > LOGRANGE)
-                                               return p;
-                                       else
-                                               return  p + log(1+exp(-diff));
-                               }
-                               else                    //p<=q
-                               {
-                                       if (-diff > LOGRANGE)
-                                               return  q;
-                                       else
-                                               return  q + log(1+exp(diff));
-                               }
-                       }
-                       return p;
-               }
-               else 
-                       return q;
-       }
-#endif
-#ifdef LOG_SUM_ARRAY
-       /** sum up a whole array of values in logspace.
-        * This function addresses the numeric instabiliy caused by simply summing up N elements by adding 
-        * each of the elements to some variable. Instead array neighbours are summed up until one element remains.
-        * Whilst the number of additions remains the same, the error is only in the order of log(N) instead N.
-        */
-       static inline REAL logarithmic_sum_array(REAL *p, INT len)
-         {
-           if (len<=2)
-             {
-               if (len==2)
-                 return logarithmic_sum(p[0],p[1]) ;
-               if (len==1)
-                 return p[0];
-               return -INFTY ;
-             }
-           else
-             {
-               register REAL *pp=p ;
-               if (len%2==1) pp++ ;
-               for (register INT j=0; j < len>>1; j++)
-                 pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
-             }
-           return logarithmic_sum_array(p,len%2+len>>1) ;
-         } 
-#endif
-       //@}
-public:
-       /**@name constants*/
-       //@{
-       /// infinity
-       static const REAL INFTY;
-       
-       /// almost neg (log) infinity
-       static const REAL ALMOST_NEG_INFTY;
-
-       /// range for logtable: log(1+exp(x))  -LOGRANGE <= x <= 0
-       static INT LOGRANGE;
-       
-#ifdef USE_LOGCACHE    
-       /// number of steps per integer
-       static INT LOGACCURACY;
-       //@}
-protected:
-       ///table with log-values
-       static REAL* logtable;  
-#endif
-       static CHAR rand_state[256];
-};
-
-
-//implementations of template functions
-template <class T>
-void CMath::qsort(T* output, INT size)
-{
-       if (size==2)
-       {
-               if (output[0] > output [1])
-                       swap(output[0],output[1]);
-       }
-       else
-       {
-               REAL split=output[(size*rand())/(RAND_MAX+1)];
-               //REAL split=output[size/2];
-
-               INT left=0;
-               INT right=size-1;
-
-               while (left<=right)
-               {
-                       while (output[left] < split)
-                               left++;
-                       while (output[right] > split)
-                               right--;
-
-                       if (left<=right)
-                       {
-                               swap(output[left],output[right]);
-                               left++;
-                               right--;
-                       }
-               }
-
-               if (right+1> 1)
-                       qsort(output,right+1);
-
-               if (size-left> 1)
-                       qsort(&output[left],size-left);
-       }
-}
-
-template <class T1,class T2>
-void CMath::qsort(T1* output, T2* index, INT size)
-{
-       if (size==2)
-       {
-               if (output[0] > output [1]){
-                       swap(output[0],output[1]);
-                       swap(index[0],index[1]);
-               }
-               
-       }
-       else
-       {
-               REAL split=output[(size*rand())/(RAND_MAX+1)];
-               //REAL split=output[size/2];
-               
-               INT left=0;
-               INT right=size-1;
-               
-               while (left<=right)
-               {
-                       while (output[left] < split)
-                               left++;
-                       while (output[right] > split)
-                               right--;
-                       
-                       if (left<=right)
-                       {
-                               swap(output[left],output[right]);
-                               swap(index[left],index[right]);
-                               left++;
-                               right--;
-                       }
-               }
-               
-               if (right+1> 1)
-                       qsort(output,index,right+1);
-               
-               if (size-left> 1)
-                       qsort(&output[left],&index[left], size-left);
-       }
-}
-
-template <class T1,class T2>
-void CMath::qsort_backward(T1* output, T2* index, INT size)
-{
-       if (size==2)
-       {
-               if (output[0] < output [1]){
-                       swap(output[0],output[1]);
-                       swap(index[0],index[1]);
-               }
-               
-       }
-       else
-       {
-               REAL split=output[(size*rand())/(RAND_MAX+1)];
-               //REAL split=output[size/2];
-               
-               INT left=0;
-               INT right=size-1;
-               
-               while (left<=right)
-               {
-                       while (output[left] > split)
-                               left++;
-                       while (output[right] < split)
-                               right--;
-                       
-                       if (left<=right)
-                       {
-                               swap(output[left],output[right]);
-                               swap(index[left],index[right]);
-                               left++;
-                               right--;
-                       }
-               }
-               
-               if (right+1> 1)
-                       qsort(output,index,right+1);
-               
-               if (size-left> 1)
-                       qsort(&output[left],&index[left], size-left);
-       }
-}
-
-template <class T> 
-void CMath::nmin(REAL* output, T* index, INT size, INT n)
-{
-       if (6*n*size<13*size*log((double)size))
-               for (INT i=0; i<n; i++)
-                       min(&output[i], &index[i], size-i) ;
-       else
-               qsort(output, index, size) ;
-}
-
-template <class T>
-void CMath::min(REAL* output, T* index, INT size)
-{
-       if (size<=0)
-               return ;
-       REAL min_elem = output[0] ;
-       INT min_index = 0 ;
-       for (INT i=1; i<size; i++)
-               if (output[i]<min_elem)
-               {
-                       min_index=i ;
-                       min_elem=output[i] ;
-               }
-       swap(output[0], output[min_index]) ;
-       swap(index[0], index[min_index]) ;
-}
-#endif
diff --git a/dyn_prog/QPalmaDP.i b/dyn_prog/QPalmaDP.i
deleted file mode 100644 (file)
index 4cf5864..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-%module QPalmaDP
-%{
-#define SWIG_FILE_WITH_INIT
-#include "common.h"
-#include "Mathmatics.h"
-#include "penalty_info.h"
-#include "qpalma_dp.h"
-%}
-
-/*%include "std_vector.i"
-%include "std_string.i"*/
-
-%include "carrays.i"
-
-%include "common.h"
-%include "Mathmatics.h"
-%include "penalty_info.h"
-%include "qpalma_dp.h"
-
-%array_class(int, intArray) ;
-%array_class(double, doubleArray) ;
-%array_functions(double, doubleFArray) ;
-%array_functions(struct penalty_struct, penaltyArray) ;
-
-/*
-%array_functions(int, intArray) ;
-%array_functions(double, doubleArray) ;
-*/
-%array_class(Pre_score, Pre_scoreArray) ;
-
-%pythoncode
-%{
-
-def createPenaltyArrayFromList(list):
-   array = new_penaltyArray(len(list))
-   for i in range(len(list)):
-      penaltyArray_setitem(array, i, list[i])
-   return array
-
-def createDoubleArrayFromList(list):
-   #array = new_doubleArray(len(list))
-   array = doubleArray(len(list))
-   for i in range(len(list)):
-   #   doubleArray_setitem(array, i, list[i])
-      array[i] = list[i]
-   return array
-
-def createIntArrayFromList(list):
-   #array = new_intArray(len(list))
-   array = intArray(len(list))
-   for i in range(len(list)):
-      #intArray_setitem(array, i, list[i])
-      array[i] = list[i]
-   return array
-
-def createListFromIntArray(array, array_len):
-   list = [0]*array_len
-   for i in range(array_len):
-      list[i] = intArray_getitem(array,i)
-   return list
-
-def createListFromDoubleArray(array, array_len):
-   list = [0]*array_len
-   for i in range(array_len):
-      list[i] = doubleArray_getitem(array,i)
-   return list
-
-%} 
diff --git a/dyn_prog/common.h b/dyn_prog/common.h
deleted file mode 100644 (file)
index 61776e9..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef __COMMON_H__
-#define __COMMON_H__
-
-#include <stdlib.h> 
-#include <stdio.h> 
-#include "config.h"
-
-#ifdef SUNOS
-#define bool int
-#define false 0
-#define true 1
-#endif
-
-#ifndef LINUX
-#define RANDOM_MAX 2147483647
-#else
-#define RANDOM_MAX RAND_MAX
-#endif
-
-/**@name Standard Types 
- * Definition of Platform independent Types
-*/
-//@{
-
-/// Type CHAR
-typedef char CHAR;
-typedef CHAR* P_CHAR;
-
-/// Type BYTE 
-typedef unsigned char BYTE;
-typedef BYTE* P_BYTE;
-
-/// Type SHORT is 2 bytes in size
-typedef short int SHORT;
-typedef SHORT* P_SHORT;
-
-/// Type WORD is 2 bytes in size
-typedef unsigned short int WORD;
-typedef WORD* P_WORD;
-
-/// Type INT is 4 bytes in size
-typedef int INT;
-typedef INT* P_INT;
-
-/// Type INT is 4 bytes in size
-typedef unsigned int UINT;
-typedef UINT* P_UINT;
-
-/// Type LONG is 8 bytes in size
-typedef long LONG;
-typedef LONG* P_LONG;
-
-/// Type SHORTREAL is 4 bytes in size
-typedef float SHORTREAL;
-typedef SHORTREAL* P_SHORTREAL;
-
-/// Type REAL is 8 bytes in size
-typedef double REAL;
-typedef REAL* P_REAL;
-
-/// Type LONGREAL is 16 bytes in size
-//typedef long double LONGREAL;
-//typedef LONGREAL* P_LONGREAL;
-
-#ifdef USE_SHORTREAL_KERNELCACHE
-       typedef SHORTREAL KERNELCACHE_ELEM;
-#else
-       typedef REAL KERNELCACHE_ELEM;
-#endif
-
-typedef KERNELCACHE_ELEM P_KERNELCACHE_ELEM;
-
-typedef LONG KERNELCACHE_IDX;
-
-/// The io libs output [DEBUG] etc in front of every CIO::message
-/// 'higher' messages filter output depending on the loglevel, i.e. CRITICAL messages
-/// will print all M_CRITICAL TO M_EMERGENCY messages to
-enum EMessageType
-{
-       M_DEBUG,
-       M_INFO,
-       M_NOTICE,
-       M_WARN,
-       M_ERROR,
-       M_CRITICAL,
-       M_ALERT,
-       M_EMERGENCY,
-       M_MESSAGEONLY,
-       M_PROGRESS
-};
-
-enum EKernelType
-{
-       K_UNKNOWN = 0,
-       K_OPTIMIZABLE = 4096,
-       K_LINEAR = 10 | K_OPTIMIZABLE,
-       K_POLY  = 20,
-       K_GAUSSIAN = 30,
-       K_HISTOGRAM = 40,
-       K_SALZBERG = 41,
-       K_LOCALITYIMPROVED = 50,
-       K_SIMPLELOCALITYIMPROVED = 60,
-       K_FIXEDDEGREE = 70,
-       K_WEIGHTEDDEGREE =    80 | K_OPTIMIZABLE,
-       K_WEIGHTEDDEGREEPOS = 81 | K_OPTIMIZABLE,
-       K_WEIGHTEDDEGREEPOLYA =    82,
-       K_WD = 83,
-       K_COMMWORD = 90 | K_OPTIMIZABLE ,
-       K_POLYMATCH = 100,
-       K_ALIGNMENT = 110,
-       K_COMMWORDSTRING = 120 | K_OPTIMIZABLE,
-       K_SPARSENORMSQUARED = 130,
-       K_COMBINED = 140 | K_OPTIMIZABLE
-};
-
-enum EFeatureType
-{
-       F_UNKNOWN = 0,
-       F_REAL = 10,
-       F_SHORT = 20,
-       F_CHAR = 30,
-       F_INT = 40,
-       F_BYTE = 50,
-       F_WORD = 60
-};
-
-enum EFeatureClass
-{
-       C_UNKNOWN = 0,
-       C_SIMPLE = 10,
-       C_SPARSE = 20,
-       C_STRING = 30,
-       C_COMBINED = 40
-};
-
-/// Alphabet of charfeatures/observations
-enum E_ALPHABET
-{
-       /// DNA - letters A,C,G,T,*,N,n
-       DNA=0,
-
-       /// PROTEIN - letters a-z
-       PROTEIN=1,
-
-       /// ALPHANUM - [0-9a-z]
-       ALPHANUM=2,
-
-       /// CUBE - [1-6]
-       CUBE=3,
-
-       /// NONE - type has no alphabet
-       NONE=4
-};
-
-//@}
-
-#define TMP_DIR "/tmp/"
-//#define TMP_DIR "/short/x46/tmp/"
-
-#endif
diff --git a/dyn_prog/config.h b/dyn_prog/config.h
deleted file mode 100644 (file)
index 88b448b..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-
-#define HAVE_MATLAB 1
-
-#define HAVE_LARGEFILE 1
-
-
-
-
-
-
-#define USE_BIGSTATES 1
-
-
-#define USE_HMMCACHE 1
-
-
-
-
-
diff --git a/dyn_prog/debug_tools.cpp b/dyn_prog/debug_tools.cpp
deleted file mode 100644 (file)
index 18c7d26..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <cstdio>
-#include <cstdlib>
-#include "debug_tools.h"
-using namespace std;
-
-void fassert(bool exp,int line, char* file) {
-   if (! exp) {
-      printf("invalid fassert at line %d in file %s!\n",line,file);
-      exit(EXIT_FAILURE);
-   }
-}
diff --git a/dyn_prog/debug_tools.h b/dyn_prog/debug_tools.h
deleted file mode 100644 (file)
index 4865bee..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __DEBUG_TOOLS__
-#define __DEBUG_TOOLS__
-
-#include <cstdio>
-
-void fassert(bool exp,int line, char* file);
-
-#endif // __DEBUG_TOOLS__
-
-#define FA(expr) (fassert(expr,__LINE__,__FILE__))
-
diff --git a/dyn_prog/fill_matrix.cpp b/dyn_prog/fill_matrix.cpp
deleted file mode 100644 (file)
index 2f87968..0000000
+++ /dev/null
@@ -1,501 +0,0 @@
-/*********************************************************************************************/
-// Version of fill_matrix with fills several alignment matrices (best, second_best ...)
-// nr_paths det. number of these matrices
-//
-// est_len: Number of columns in alignment matrix: length(EST) +1 (for the gap) (variable: i)
-// dna_len: Number of lines in alignment matrix: length(DNA) +1 (for the gap) (variable: j)
-//
-// !!! IMPORTANT !!! The pictures and equations in the paper/poster etc. describe the algorithm
-// for an alignment matrix where DNA and EST changed places !!! IMPORTANT !!!
-/*********************************************************************************************/
-
-/*
-  matchmatrix: columnwise
-    - A C G T N (dna)
-  - x x x x x x
-  A x x x x x x        
-  C x x x x x x
-  G x x x x x x
-  T x x x x x x
-  N x x x x x x
-(est)
-*/
-
-/*
-  alignment matrix: columnwise
-   |-EST . . .
-  -+------------
-  -|00 ...
-  D|0.
-  N|. .
-  A|.  .
-  .|.
-  .|
-  .|
-*/
-
-
-/* TODO
-   - max_len = 10000 for elegans!
-   - do not use double
-*/
-
-#include "fill_matrix.h"
-#include "debug_tools.h"
-
-#define D_USE_QUALITY_SCORES 1
-
-int check_char(char base)
-{
-  switch(base)
-  {
-  case 'A':
-  case 'a': return 1;
-  case 'C':    
-  case 'c': return 2;
-  case 'G':
-  case 'g': return 3;
-  case 'T':
-  case 't': return 4;
-  case 'N':
-  case 'n': return 5; 
-  //default : return 0;
-  default : return -1;
-  }
-  
-}
-
- /* system of Palma. Otherwise scoring_functions should point to an array of
- * plifs scoring the respective pairs of characters together with the EST
- * quality score.
- */
-
-inline double getScore(struct penalty_struct* qualityScores, int mlen, int dnaChar, int estChar, double baseScore) {
-   double score = 0.0;
-   FA(estChar > 0 && estChar < 6);
-   FA(dnaChar > -1 && dnaChar < 6);
-
-   //printf("Starting scoring / scheme is USE_QUALITY_SCORES\n");
-   //printf("estChar,dnaChar are: %d,%d,%d",estChar,dnaChar);
-   
-   int currentPos = (estChar-1)*6+dnaChar;
-   struct penalty_struct currentPen = qualityScores[currentPos];
-
-   //if( estChar == 1 and dnaChar == 1)
-   //   printf("A-A : %f %f\n",currentPen.penalties[0],currentPen.penalties[1]);
-
-       INT idx = 0 ;
-       for (INT i=0; i<currentPen.len; i++)
-               if (currentPen.limits[i] <= baseScore)
-                       idx++ ;
-       
-       if (idx==0)
-               score=currentPen.penalties[0] ;
-       else if (idx==currentPen.len)
-               score=currentPen.penalties[currentPen.len-1] ;
-       else
-       {
-               score = (currentPen.penalties[idx]*(baseScore-currentPen.limits[idx-1]) + currentPen.penalties[idx-1]*
-                          (currentPen.limits[idx]-baseScore)) / (currentPen.limits[idx]-currentPen.limits[idx-1]) ;  
-   }
-
-   //score = lookup_penalty(&currentPen,idx, NULL, 0);
-   //if(baseScore <= currentPen.limits[0]) {
-   //   score = currentPen.penalties[0];
-   //} else {
-   //   int sp_idx;
-   //   for(sp_idx=0;sp_idx<currentPen.len;sp_idx++) {
-   //      if(currentPen.limits[sp_idx] <= baseScore )
-   //         score = currentPen.penalties[sp_idx];
-   //   //printf("Current score is %f\n",score);
-   //   }
-   //}
-
-   return score;
-}
-
-void fill_matrix(int nr_paths, Pre_score* matrices[], int est_len /*counter var: i*/, int dna_len /*counter var: j*/, char* est, char* dna, double* prb, penalty_struct* functions , double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions)
-{      
-  //printf("Entering fill_matrix...\n");
-
-  /*********************************************************************************************/
-  // variables
-  /*********************************************************************************************/
-  const int mlen = 6; // length of matchmatrix
-  int estChar, dnaChar; // 0:'-', 1:'a', 2:'c', 3:'g', 4:'t', 5:'n'
-
-  double baseScore;
-  double *est_scores = prb ;
-
-  double prevValue ;
-  double tempValue;
-  int max_len = (int)functions->limits[functions->len-1] ; //last (len) supporting point of h
-  //printf("max_len: %d\n", max_len) ;
-
-  int num_elem ; // counter variable
-
-  const int max_num_elem = (4+nr_paths+max_len) * nr_paths ; // upper bound on num_elem
-  double *scores=new double[max_num_elem] ; //are sorted later
-  struct ArrayElem *array=new ArrayElem[max_num_elem] ;
-  //printf("%d\n", max_num_elem*sizeof(ArrayElem)) ;
-  
-  int max_ss_element[nr_paths*nr_paths]; //position on dna with max ss score for introns of length greater than max_len
-  int splice_pos ; //splice position get from donor_sites
-  int start_splice_pos ; //where to start looking in donor_sites
-
-  double max_scores[nr_paths] ;
-
-  /*********************************************************************************************/
-  // Initialisation of all alignment matrices, columnwise
-  /*********************************************************************************************/
-  for (int z=0; z<nr_paths; z++) /* matrice z */
-  {
-    max_scores[z] = log(0.0) ;
-    max_score_positions[2*z]    = 0; // pos i
-    max_score_positions[2*z +1] = 0; // pos j
-
-    for (int i=0; i<est_len; i++) /* i.column */
-    {
-      for (int j=0; j<dna_len; j++) /* j.line */
-      {
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->value = log(0.0); // -inf
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_i = 0;
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_j = 0;
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->prev_matrix_no = 0;
-       ((Pre_score*)matrices[z] +(i*dna_len) +j)->issplice = 0;
-      }
-    }
-  }
-
-  /*********************************************************************************************/      
-  // Element (0,0) for first/best matrice (z=0)
-  /*********************************************************************************************/       
-  ((Pre_score*)matrices[0])->value = 0;
-  ((Pre_score*)matrices[0])->prev_i = 0;
-  ((Pre_score*)matrices[0])->prev_j = 0;
-  ((Pre_score*)matrices[0])->prev_matrix_no = 0;
-  ((Pre_score*)matrices[0])->issplice = 0;
-
-  /*********************************************************************************************/      
-  // Line Zero for first/best matrice (z=0)
-  /*********************************************************************************************/
-  for (int i=1; i<est_len; i++) /* 0.line, all columns (DNA='-') */
-  {    
-    ((Pre_score*)matrices[0] +(i*dna_len))->value = 0 ;                
-    ((Pre_score*)matrices[0] +(i*dna_len))->prev_i = i-1;
-    ((Pre_score*)matrices[0] +(i*dna_len))->prev_j = 0;    
-    ((Pre_score*)matrices[0] +(i*dna_len))->prev_matrix_no = 0;
-    ((Pre_score*)matrices[0] +(i*dna_len))->issplice = 0;
-  }
-
-
-  /*********************************************************************************************/      
-  // Column Zero for first/best matrice (z=0)
-  /*********************************************************************************************/      
-  for (int j=1; j<dna_len; j++) // 0.column, all lines
-  {      
-    ((Pre_score*)matrices[0] +j)->value =  0 ;
-    ((Pre_score*)matrices[0] +j)->prev_i = 0 ;
-    ((Pre_score*)matrices[0] +j)->prev_j = j-1 ;
-    ((Pre_score*)matrices[0] +j)->prev_matrix_no = 0 ;
-    ((Pre_score*)matrices[0] +j)->issplice = 0;
-  }
-
-   
-  /*********************************************************************************************/      
-  // !!!! Column Zero and line Zero for all other matrices except the best one (z>0): -INF !!!! 
-  /*********************************************************************************************/      
-
-
-   //printf("after initialization\n");
-  
-  /*********************************************************************************************/
-  // all other entries for all matrices, last column and row treated seperately
-  /*********************************************************************************************/
-  
-  /* columnwise, 0.column and 0.line already filled */
-  for (int i=1; i<est_len; i++) /* i.column */
-  {
-    
-    start_splice_pos = 0 ; //where to start looking in donor_sites
-    for (int zz=0; zz<nr_paths*nr_paths; zz++) {max_ss_element[zz]=0 ;} // initializing
-  
-    for (int j=1; j<dna_len; j++) //j.line
-    {
-      dnaChar = check_char(dna[j-1]) ; // -1 because of of the gaps in the 0.line/column of the al-matrix
-      estChar = check_char(est[i-1]) ;
-      baseScore = est_scores[i-1];
-
-      //printf("at iteration %d %d\n",i,j);
-
-      //if introns of length greater than max_len possible
-      if (j > max_len)
-      {
-       //storing highest ss_score (the position) and NOT -INF
-       int new_ss_pos = j-max_len ;
-       double donor_score = donor[new_ss_pos-1] ;
-       if (isfinite(donor_score)) //splice site!
-        {
-         if (max_ss_element[nr_paths-1] != 0)
-          { 
-           for (int zz=0; zz<nr_paths; zz++) //for each alignment matrix
-            {
-             int pos_with_minscore = nr_paths ; //assuming, the new score is the worst score
-             double new_intron_score = donor_score+((Pre_score*)matrices[zz]+(i*dna_len)+new_ss_pos-1)->value ; //could be -INF
-             if (!(isfinite(new_intron_score))) {printf("new_intron_score is -INF\n") ;}
-             for (int pos=0; pos<nr_paths; pos++) //find worst score of all nr_paths+1 scores
-              {
-               int act_splice_position = max_ss_element[zz*nr_paths+pos] ;
-               assert(isfinite(donor[act_splice_position-1])) ;
-               double act_intron_score = donor[act_splice_position-1]+((Pre_score*)matrices[zz]+(i*dna_len)+act_splice_position-1)->value ;
-               if (new_intron_score >= act_intron_score) 
-                {
-                 new_intron_score = act_intron_score ;
-                 pos_with_minscore = pos ;
-               } //else nothing happens, new intron score still the worst one
-             }
-             if (pos_with_minscore != nr_paths)
-              {
-               max_ss_element[zz*nr_paths+pos_with_minscore] = new_ss_pos ; //replacing
-             }
-           }
-         }
-         else
-          {
-           for (int pos=0; pos<nr_paths; pos++)
-            {
-             if (max_ss_element[pos]==0)  //no previous splice site at this pos
-              {
-               for (int zz=0; zz<nr_paths; zz++) {max_ss_element[zz*nr_paths+pos]=j-max_len ;}
-               break ;
-             }
-           } 
-         }
-       }
-       //start_pos in donor_sites so that only introns shorter then max_length are seen
-       if ((start_splice_pos < nr_donor_sites) && donor_sites[start_splice_pos] == j-max_len)
-        {
-         start_splice_pos++ ;
-       }
-      }
-      
-
-      /* Computing all possible scores */
-      num_elem = 0 ; /* number of different scores/possibilities so far */
-      for (int z=0; z<nr_paths; z++) /* matrice z */
-      {
-       Pre_score* actMatrix = (Pre_score*)matrices[z]; /* hm, why?  */
-
-       /***************************************/
-       // 0. Zero (local alignment version)
-        /***************************************/
-       assert(num_elem<max_num_elem) ;
-       array[num_elem].prev_i = 0; 
-       array[num_elem].prev_j = 0; 
-       array[num_elem].prev_matrix_no = z;
-       array[num_elem].isSplice = false; /* no splice site */
-       scores[num_elem] = 0 ;
-       num_elem++ ;
-
-
-        /***************************************/
-       // 1. gap on EST (j-1,i) -> (j,i) 
-        /***************************************/
-       prevValue = ((Pre_score*)actMatrix +(i*dna_len)+j-1)->value ; /* (j-1,i) -> (j,i) */
-       if (isfinite(prevValue))
-       {
-      //if( currentMode == USE_QUALITY_SCORES ) {
-#if D_USE_QUALITY_SCORES 
-         tempValue = prevValue + matchmatrix[dnaChar];
-#else
-      //} else {
-             tempValue = prevValue + matchmatrix[mlen*dnaChar];   /* score(gap,DNA) */
-      //}
-#endif
-
-     //printf("tempValue: %f\n",tempValue);
-
-         if (isfinite(tempValue))
-         {
-           assert(num_elem<max_num_elem) ;
-           array[num_elem].prev_i = i; 
-           array[num_elem].prev_j = j-1; /* predecessor */
-           array[num_elem].prev_matrix_no = z;
-           array[num_elem].isSplice = false; /* no splice site */
-           scores[num_elem] = -tempValue ;
-           num_elem++ ;
-         }
-       }
-      
-
-       /***************************************/
-       // 2. match/mismatch (j-1,i-1) -> (j,i)
-       /***************************************/
-       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j-1)->value ; /* (j-1,i-1) -> (j,i) */
-       if (isfinite(prevValue))
-       {
-      //if( currentMode == USE_QUALITY_SCORES ) {
-#if D_USE_QUALITY_SCORES 
-        tempValue = prevValue + getScore(qualityScores,mlen,dnaChar,estChar,baseScore);
-#else
-      //} else {
-        tempValue = prevValue +(matchmatrix[mlen* dnaChar +estChar]);
-#endif
-     //}
-
-     //printf("tempValue: %f\n",tempValue);
-
-         if (isfinite(tempValue))
-         {
-           assert(num_elem<max_num_elem) ;
-           array[num_elem].prev_i = i-1; /* predecessor */
-           array[num_elem].prev_j = j-1; /* predecessor */
-           array[num_elem].prev_matrix_no = z;
-           array[num_elem].isSplice = false; /* no splice site */
-           scores[num_elem] = -tempValue ;
-           num_elem++ ;
-         }
-       }
-       
-
-       /***************************************/
-       // 3. gap on DNA (j,i-1) -> (j,i)
-       /***************************************/
-       prevValue = ((Pre_score*)actMatrix +(i-1)*dna_len + j)->value ;
-       if (isfinite(prevValue))
-       {
-      //if( currentMode == USE_QUALITY_SCORES ) {
-#if D_USE_QUALITY_SCORES 
-        tempValue = prevValue + getScore(qualityScores,mlen,0,estChar,baseScore);
-#else
-//      } else {
-            tempValue = prevValue +(matchmatrix[estChar]); /* score(EST,gap) */
-//      }
-#endif
-
-     //printf("tempValue: %f\n",tempValue);
-
-         if (isfinite(tempValue))
-         {
-           assert(num_elem<max_num_elem) ;
-           array[num_elem].prev_i = i-1; /* predecessor */
-           array[num_elem].prev_j = j; 
-           array[num_elem].prev_matrix_no = z;
-           array[num_elem].isSplice = false;  /* no splice site */
-           scores[num_elem]=-tempValue ;
-           num_elem++ ;
-         }
-       }
-
-       /***************************************/
-       // 4. all possible splice sites/introns
-       /***************************************/
-       // acceptor[j-1]: g of ag (-1 because cpp starts counting at 0)
-       // donor[splice_pos-1]: g of gt/gc (-1 because cpp starts counting at 0)
-       // donor_sites: the g's of the gt/gc
-       
-       if (isfinite(acceptor[j-1]))
-       {
-         
-         //the nr_paths many introns of length greater than max_len
-         for (int pos=0; pos<nr_paths; pos++)
-          {
-           if (max_ss_element[z*nr_paths+pos]>0)
-            {
-             assert((j-max_ss_element[z*nr_paths+pos]+1) > max_len) ;
-             splice_pos = max_ss_element[z*nr_paths+pos] ; //the g of the gt 
-             prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
-             if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
-              {
-               int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
-               assert(dist>max_len) ;
-               tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
-
-       //printf("tempValue: %f\n",tempValue);
-
-               if (isfinite(tempValue))
-                {
-                 assert(num_elem<max_num_elem) ;
-                 array[num_elem].prev_i = i; 
-                 array[num_elem].prev_j = splice_pos-1; // predecessor
-                 array[num_elem].prev_matrix_no = z;
-                 array[num_elem].isSplice = true; // splice site
-                 scores[num_elem]=-tempValue ;
-                 num_elem++ ;
-               }
-               else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
-                {
-                 printf("Something wrong with donor_sites(1000er) (intron_length: %d)\n", dist) ;
-                 sleep(5000) ;
-               }
-             }
-           }
-         }
-
-          //all introns of length <= max_len
-          for (int k=start_splice_pos; k < nr_donor_sites; k++) 
-         //for (int k=0; k < nr_donor_sites; k++) //also intron of length greater than 1000
-         {   
-           splice_pos = donor_sites[k] ;
-           if (splice_pos > j-4) { break ;}
-           prevValue = ((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->value ; //one before the _gt
-           
-           if (isfinite(prevValue) && (((Pre_score*)actMatrix + (i*dna_len) +splice_pos-1)->issplice!=1))
-           {
-             int dist = j -(splice_pos-1); //_gt...ag: dist from g to g
-             assert(dist<=max_len) ;
-             tempValue = lookup_penalty(functions,dist, NULL, 0) +donor[splice_pos-1] +acceptor[j-1] +prevValue;
-             if (isfinite(tempValue))
-             {
-               assert(num_elem<max_num_elem) ;
-               array[num_elem].prev_i = i; 
-               array[num_elem].prev_j = splice_pos-1; /* predecessor */
-               array[num_elem].prev_matrix_no = z;
-               array[num_elem].isSplice = true; /* splice site */
-               scores[num_elem]=-tempValue ;
-               num_elem++ ;
-             }
-             else if (isfinite(lookup_penalty(functions,dist, NULL, 0)))
-             {
-               printf("Something wrong with donor_sites (intron_length: %d)\n", dist) ;
-               sleep(5000) ;
-             }
-           }
-         }
-       }
-
-      } //end of z
-      
-      CMath::nmin<struct ArrayElem>(scores, array, num_elem, nr_paths);
-      
-      /* the nr_paths many highest scores are written in the nr_paths many matrices*/
-      for (int z=0; z<nr_paths; z++) {
-       if (z>=num_elem) {
-         //nothing happens, entry in matrices[z] at pos. (i,j) still -inf
-       }
-       else {
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->value = -scores[z] ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_i = array[z].prev_i ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_j = array[z].prev_j ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->issplice = array[z].isSplice ;
-         ((Pre_score*)matrices[z] + (i*dna_len) +j)->prev_matrix_no = array[z].prev_matrix_no;
-
-         if (-scores[z]>=max_scores[z]) { //storing the postion of the highest alignment score yet
-           max_scores[z] = -scores[z] ;
-           max_score_positions[2*z]    = i; // pos i
-           max_score_positions[2*z +1] = j; // pos j
-         }
-       }
-      } //end of z
-
-    } //end of j
-  } // end of i
-
-  //free memory
-  delete[] array;
-  delete[] scores;
-
-  //printf("Leaving fill_matrix...\n");
-  return;
-}
diff --git a/dyn_prog/fill_matrix.h b/dyn_prog/fill_matrix.h
deleted file mode 100644 (file)
index 9432c4e..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef _FILLMATRIX_H__
-#define _FILLMATRIX_H__
-
-#include "Mathmatics.h"
-#include "penalty_info.h"
-#include <algorithm> //Eigentlich in Header
-#include <assert.h>
-#include <ctype.h>
-#include <iostream>
-#include <math.h>
-#include <stdlib.h> 
-#include <string.h>
-
-
-struct ArrayElem { //24B
-  double score;
-  int prev_j;
-  int prev_i;
-  int prev_matrix_no;   
-  bool isSplice;
-} ;
-
-struct pre_score { //24B
-  double value; //8
-  int prev_i; //4
-  int prev_j; //4
-  int prev_matrix_no;//4
-  bool issplice;//4
-};
-
-typedef struct pre_score Pre_score;
-
-typedef struct Align_pair { //24B
-  double value ; 
-  bool issplice;
-  int previ,prevj ;
-  char est_char;
-  char dna_char;
-};
-
-
-void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
-// funktion aendern in void result_align(Pre_score* matrices[], int anzpath,
-
-int check_char(char base);
-
-bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions );
-
-extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
-
-double getScore(double *matchmatrix,int mlen, int dnaChar, int estChar);
-
-
-double scoreMatch(int dnaChar, int estChar, double baseScore);
-
-
-#endif // _FILLMATRIX_H__
diff --git a/dyn_prog/io.cpp b/dyn_prog/io.cpp
deleted file mode 100644 (file)
index 5ae3e1c..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "io.h"
-#include "common.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-#include <ctype.h>
-
-FILE* CIO::target=stdout;
-
-CIO::CIO()
-{
-}
-
-void CIO::message(EMessageType prio, const CHAR *fmt, ... )
-{
-       check_target();
-       print_message_prio(prio, target);
-    va_list list;
-    va_start(list,fmt);
-    vfprintf(target,fmt,list);
-    va_end(list);
-    fflush(target);
-}
-
-void CIO::buffered_message(EMessageType prio, const CHAR *fmt, ... )
-{
-       check_target();
-       print_message_prio(prio, target);
-    va_list list;
-    va_start(list,fmt);
-    vfprintf(target,fmt,list);
-    va_end(list);
-}
-
-CHAR* CIO::skip_spaces(CHAR* str)
-{
-       INT i=0;
-
-       if (str)
-       {
-               for (i=0; isspace(str[i]); i++);
-
-               return &str[i];
-       }
-       else 
-               return str;
-}
-
-void CIO::set_target(FILE* t)
-{
-       target=t;
-}
-
-void CIO::check_target()
-{
-       if (!target)
-               target=stdout;
-}
-
-void CIO::print_message_prio(EMessageType prio, FILE* target)
-{
-       switch (prio)
-       {
-               case M_DEBUG:
-                       fprintf(target, "[DEBUG] ");
-                       break;
-               case M_PROGRESS:
-                       fprintf(target, "[PROGRESS] ");
-                       break;
-               case M_INFO:
-                       //fprintf(target, "[INFO] ");
-                       break;
-               case M_NOTICE:
-                       fprintf(target, "[NOTICE] ");
-                       break;
-               case M_WARN:
-                       fprintf(target, "[WARN] ");
-                       break;
-               case M_ERROR:
-                       fprintf(target, "[ERROR] ");
-                       break;
-               case M_CRITICAL:
-                       fprintf(target, "[CRITICAL] ");
-                       break;
-               case M_ALERT:
-                       fprintf(target, "[ALERT] ");
-                       break;
-               case M_EMERGENCY:
-                       fprintf(target, "[EMERGENCY] ");
-                       break;
-               case M_MESSAGEONLY:
-                       break;
-               default:
-                       break;
-       }
-}
diff --git a/dyn_prog/io.h b/dyn_prog/io.h
deleted file mode 100644 (file)
index c127f92..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __CIO_H__
-#define __CIO_H__
-
-#include "common.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-
-class CIO
-{
-public:
-       CIO();
-
-       static void set_target(FILE* target);
-       static void message(EMessageType prio, const CHAR *fmt, ... );
-
-       inline static void not_implemented() 
-       {
-               message(M_ERROR, "Sorry, not yet implemented\n");
-       }
-
-       static void buffered_message(EMessageType prio, const CHAR *fmt, ... );
-
-       static CHAR* skip_spaces(CHAR* str);
-
-protected:
-       static void check_target();
-       static void print_message_prio(EMessageType prio, FILE* target);
-
-protected:
-       static FILE* target;
-};
-#endif
diff --git a/dyn_prog/penalty_info.cpp b/dyn_prog/penalty_info.cpp
deleted file mode 100644 (file)
index d5d739f..0000000
+++ /dev/null
@@ -1,368 +0,0 @@
-#include <assert.h>
-#include "config.h"
-//#include "features/CharFeatures.h"
-//#include "features/StringFeatures.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#include "io.h"
-
-
-#include "fill_matrix.h"
-#include "penalty_info.h"
-
-void init_penalty_struct(struct penalty_struct &PEN)
-{
-       PEN.limits=NULL ;
-       PEN.penalties=NULL ;
-       PEN.id=-1 ;
-       PEN.next_pen=NULL ;
-       PEN.transform = T_LINEAR ;
-       PEN.name = NULL ;
-       PEN.max_len=0 ;
-       PEN.min_len=0 ;
-       PEN.cache=NULL ;
-       PEN.use_svm=0 ;
-}
-
-void init_penalty_struct_cache(struct penalty_struct &PEN)
-{
-       if (PEN.cache || PEN.use_svm)
-               return ;
-               
-       REAL* cache=new REAL[PEN.max_len+1] ;
-       if (cache)
-       {
-               for (INT i=0; i<=PEN.max_len; i++)
-                       cache[i] = lookup_penalty(&PEN, i, 0, false) ;
-               PEN.cache = cache ;
-       }
-}
-
-void delete_penalty_struct_palma(struct penalty_struct &PEN)
-{
-       if (PEN.id!=-1)
-       {
-               delete[] PEN.limits ;
-               delete[] PEN.penalties ;
-               delete[] PEN.name ;
-               delete[] PEN.cache ;
-       }
-}
-
-void delete_penalty_struct_array(struct penalty_struct *PEN, INT len)
-{
-       for (int i=0; i<len; i++)
-               delete_penalty_struct_palma(PEN[i]) ;
-       delete[] PEN ;
-}
-
-void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp) {
-   newp->len       = old->len;
-   newp->limits    = old->limits;
-   newp->penalties = old->penalties;
-   newp->max_len   = old->max_len;
-   newp->min_len   = old->min_len;
-   newp->cache     = old->cache;
-   newp->transform = old->transform;
-   newp->id        = old->id;
-   newp->next_pen  = old->next_pen;
-   newp->name      = old->name;
-   newp->use_svm   = old->use_svm;
-}
-
-
-//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, int &P)
-//{
-//     P = mxGetN(mx_penalty_info) ;
-//     
-//     struct penalty_struct * PEN = new struct penalty_struct[P] ;
-//     for (INT i=0; i<P; i++)
-//              init_penalty_struct(PEN[i]) ;
-//     
-//     for (INT i=0; i<P; i++)
-//     {
-//       const mxArray* mx_elem = mxGetCell(mx_penalty_info, i) ;
-//       if (mx_elem==NULL || !mxIsStruct(mx_elem))
-//         {
-//           CIO::message(M_ERROR, "empty cell element\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         } ;
-//       const mxArray* mx_id_field = mxGetField(mx_elem, 0, "id") ;
-//       if (mx_id_field==NULL || !mxIsNumeric(mx_id_field) || 
-//           mxGetN(mx_id_field)!=1 || mxGetM(mx_id_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing id field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_limits_field = mxGetField(mx_elem, 0, "limits") ;
-//       if (mx_limits_field==NULL || !mxIsNumeric(mx_limits_field) ||
-//           mxGetM(mx_limits_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing limits field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT len = mxGetN(mx_limits_field) ;
-//       
-//       const mxArray* mx_penalties_field = mxGetField(mx_elem, 0, "penalties") ;
-//       if (mx_penalties_field==NULL || !mxIsNumeric(mx_penalties_field) ||
-//           mxGetM(mx_penalties_field)!=1 || mxGetN(mx_penalties_field)!=len)
-//         {
-//           CIO::message(M_ERROR, "missing penalties field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_transform_field = mxGetField(mx_elem, 0, "transform") ;
-//       if (mx_transform_field==NULL || !mxIsChar(mx_transform_field))
-//         {
-//           CIO::message(M_ERROR, "missing transform field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_name_field = mxGetField(mx_elem, 0, "name") ;
-//       if (mx_name_field==NULL || !mxIsChar(mx_name_field))
-//         {
-//           CIO::message(M_ERROR, "missing name field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_max_len_field = mxGetField(mx_elem, 0, "max_len") ;
-//       if (mx_max_len_field==NULL || !mxIsNumeric(mx_max_len_field) ||
-//           mxGetM(mx_max_len_field)!=1 || mxGetN(mx_max_len_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing max_len field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_min_len_field = mxGetField(mx_elem, 0, "min_len") ;
-//       if (mx_min_len_field==NULL || !mxIsNumeric(mx_min_len_field) ||
-//           mxGetM(mx_min_len_field)!=1 || mxGetN(mx_min_len_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing min_len field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       const mxArray* mx_use_svm_field = mxGetField(mx_elem, 0, "use_svm") ;
-//       if (mx_use_svm_field==NULL || !mxIsNumeric(mx_use_svm_field) ||
-//           mxGetM(mx_use_svm_field)!=1 || mxGetN(mx_use_svm_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing use_svm field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT use_svm = (INT) mxGetScalar(mx_use_svm_field) ;
-//             //fprintf(stderr, "use_svm_field=%i\n", use_svm) ;
-//       
-//       const mxArray* mx_next_id_field = mxGetField(mx_elem, 0, "next_id") ;
-//       if (mx_next_id_field==NULL || !mxIsNumeric(mx_next_id_field) ||
-//           mxGetM(mx_next_id_field)!=1 || mxGetN(mx_next_id_field)!=1)
-//         {
-//           CIO::message(M_ERROR, "missing next_id field\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT next_id = (INT) mxGetScalar(mx_next_id_field)-1 ;
-//       
-//       INT id = (INT) mxGetScalar(mx_id_field)-1 ;
-//       if (i<0 || i>P-1)
-//         {
-//           CIO::message(M_ERROR, "id out of range\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       INT max_len = (INT) mxGetScalar(mx_max_len_field) ;
-//       if (max_len<0 || max_len>1024*1024*100)
-//         {
-//           CIO::message(M_ERROR, "max_len out of range\n") ;
-//                     delete_penalty_struct_array(PEN,P) ;
-//                     return NULL ;
-//         }
-//       PEN[id].max_len = max_len ;
-//
-//       INT min_len = (INT) mxGetScalar(mx_min_len_field) ;
-//       if (min_len<0 || min_len>1024*1024*100)
-//         {
-//           CIO::message(M_ERROR, "min_len out of range\n") ;
-//                     delete_penalty_struct_array(PEN,P) ;
-//                     return NULL ;
-//         }
-//       PEN[id].min_len = min_len ;
-//       
-//       if (PEN[id].id!=-1)
-//         {
-//           CIO::message(M_ERROR, "penalty id already used\n") ;
-//           delete_penalty_struct_array(PEN,P) ;
-//           return NULL ;
-//         }
-//       PEN[id].id=id ;
-//       if (next_id>=0)
-//         PEN[id].next_pen=&PEN[next_id] ;
-//       //fprintf(stderr,"id=%i, next_id=%i\n", id, next_id) ;
-//       
-//       assert(next_id!=id) ;
-//       PEN[id].use_svm=use_svm ;
-//       PEN[id].limits = new REAL[len] ;
-//       PEN[id].penalties = new REAL[len] ;
-//       double * limits = mxGetPr(mx_limits_field) ;
-//       double * penalties = mxGetPr(mx_penalties_field) ;
-//       
-//       for (INT i=0; i<len; i++)
-//         {
-//           PEN[id].limits[i]=limits[i] ;
-//           PEN[id].penalties[i]=penalties[i] ;
-//         }
-//       PEN[id].len = len ;
-//       
-//       char *transform_str = mxArrayToString(mx_transform_field) ;                           
-//       char *name_str = mxArrayToString(mx_name_field) ;                             
-//       
-//       if (strcmp(transform_str, "log")==0)
-//         PEN[id].transform = T_LOG ;
-//       else if (strcmp(transform_str, "log(+1)")==0)
-//         PEN[id].transform = T_LOG_PLUS1 ;   
-//       else if (strcmp(transform_str, "log(+3)")==0)
-//         PEN[id].transform = T_LOG_PLUS3 ;   
-//       else if (strcmp(transform_str, "(+3)")==0)
-//         PEN[id].transform = T_LINEAR_PLUS3 ;        
-//       else if (strcmp(transform_str, "")==0)
-//         PEN[id].transform = T_LINEAR ;      
-//       else
-//         {
-//           delete_penalty_struct_array(PEN,P) ;
-//           mxFree(transform_str) ;
-//           return NULL ;
-//         }
-//       PEN[id].name = new char[strlen(name_str)+1] ;
-//       strcpy(PEN[id].name, name_str) ;
-//       
-//       init_penalty_struct_cache(PEN[id]) ;
-//         
-//       mxFree(transform_str) ;
-//       mxFree(name_str) ;
-//     }
-//     return PEN ;
-//}
-
-REAL lookup_penalty_svm(const struct penalty_struct *PEN, INT p_value, REAL *d_values)
-{      
-       if (PEN==NULL)
-               return 0 ;
-       assert(PEN->use_svm>0) ;
-       REAL d_value=d_values[PEN->use_svm-1] ;
-       //fprintf(stderr,"transform=%i, d_value=%1.2f\n", (INT)PEN->transform, d_value) ;
-       
-       switch (PEN->transform)
-       {
-       case T_LINEAR:
-               break ;
-       case T_LOG:
-               d_value = log(d_value) ;
-               break ;
-       case T_LOG_PLUS1:
-               d_value = log(d_value+1) ;
-               break ;
-       case T_LOG_PLUS3:
-               d_value = log(d_value+3) ;
-               break ;
-       case T_LINEAR_PLUS3:
-               d_value = d_value+3 ;
-               break ;
-       default:
-               CIO::message(M_ERROR, "unknown transform\n") ;
-               break ;
-       }
-       
-       INT idx = 0 ;
-       REAL ret ;
-       for (INT i=0; i<PEN->len; i++)
-               if (PEN->limits[i]<=d_value)
-                       idx++ ;
-       
-       if (idx==0)
-               ret=PEN->penalties[0] ;
-       else if (idx==PEN->len)
-               ret=PEN->penalties[PEN->len-1] ;
-       else
-       {
-               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
-                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
-       }
-       
-       //fprintf(stderr,"ret=%1.2f\n", ret) ;
-
-       if (PEN->next_pen)
-               ret+=lookup_penalty(PEN->next_pen, p_value, d_values);
-       
-       //fprintf(stderr,"ret=%1.2f\n", ret) ;
-       
-       return ret ;
-}
-
-REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
-                                       REAL* svm_values, bool follow_next)
-{      
-       if (PEN==NULL)
-               return 0 ;
-       if (PEN->use_svm)
-               return lookup_penalty_svm(PEN, p_value, svm_values) ;
-               
-       if ((p_value<PEN->min_len) || (p_value>PEN->max_len))
-               return -CMath::INFTY ;
-       
-       if (PEN->cache!=NULL && (p_value>=0) && (p_value<=PEN->max_len))
-       {
-               REAL ret=PEN->cache[p_value] ;
-               if (PEN->next_pen && follow_next)
-                       ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
-               return ret ;
-       }
-       
-       REAL d_value = (REAL) p_value ;
-       switch (PEN->transform)
-       {
-       case T_LINEAR:
-               break ;
-       case T_LOG:
-               d_value = log(d_value) ;
-               break ;
-       case T_LOG_PLUS1:
-               d_value = log(d_value+1) ;
-               break ;
-       case T_LOG_PLUS3:
-               d_value = log(d_value+3) ;
-               break ;
-       case T_LINEAR_PLUS3:
-               d_value = d_value+3 ;
-               break ;
-       default:
-               CIO::message(M_ERROR, "unknown transform\n") ;
-               break ;
-       }
-
-       INT idx = 0 ;
-       REAL ret ;
-       for (INT i=0; i<PEN->len; i++)
-               if (PEN->limits[i]<=d_value)
-                       idx++ ;
-       
-       if (idx==0)
-               ret=PEN->penalties[0] ;
-       else if (idx==PEN->len)
-               ret=PEN->penalties[PEN->len-1] ;
-       else
-       {
-               ret = (PEN->penalties[idx]*(d_value-PEN->limits[idx-1]) + PEN->penalties[idx-1]*
-                          (PEN->limits[idx]-d_value)) / (PEN->limits[idx]-PEN->limits[idx-1]) ;  
-       }
-       //if (p_value>=30 && p_value<150)
-       //fprintf(stderr, "%s %i(%i) -> %1.2f\n", PEN->name, p_value, idx, ret) ;
-       
-       if (PEN->next_pen && follow_next)
-               ret+=lookup_penalty(PEN->next_pen, p_value, svm_values);
-
-       return ret ;
-}
diff --git a/dyn_prog/penalty_info.h b/dyn_prog/penalty_info.h
deleted file mode 100644 (file)
index 30de4aa..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef __PENALTY_INFO_H__
-#define __PENALTY_INFO_H__
-
-#include "common.h"
-#include "Mathmatics.h"
-
-enum ETransformType
-{
-       T_LINEAR,
-       T_LOG,
-       T_LOG_PLUS1,
-       T_LOG_PLUS3,
-       T_LINEAR_PLUS3
-}  ;
-
-struct penalty_struct
-{
-       INT len ;
-       REAL *limits ;
-       REAL *penalties ;
-       INT max_len ;
-       INT min_len ;
-       REAL *cache ;
-       enum ETransformType transform ;
-       INT id ;
-       struct penalty_struct *next_pen ;
-       char * name ;
-       INT use_svm ;
-} ;
-
-void init_penalty_struct(struct penalty_struct &PEN) ;
-void delete_penalty_struct_palma(struct penalty_struct &PEN) ;
-//void delete_penalty_struct_array(struct penalty_struct *PEN, INT len) ;
-void copy_penalty_struct(struct penalty_struct *old, struct penalty_struct *newp);
-
-//#ifdef HAVE_MATLAB
-//struct penalty_struct * read_penalty_struct_from_cell(const mxArray * mx_penalty_info, mwSize &P) ;
-//#endif
-
-REAL lookup_penalty(const struct penalty_struct *PEN, INT p_value, 
-                                       REAL* svm_values, bool follow_next=true) ;
-
-enum mode { NORMAL, USE_QUALITY_SCORES};
-
-#endif
diff --git a/dyn_prog/print_align.cpp b/dyn_prog/print_align.cpp
deleted file mode 100644 (file)
index 312cabc..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-#include "fill_matrix.h"
-using namespace std;
-
-void print_align(Pre_score* matrix, int length_est,  int length_dna,Align_pair* vektor, int result_length, int print_matrix)
-  
-{
-  //Druck der matrix
-  switch(print_matrix)
-    {
-    case 1:
-      {
-       cout <<"Matrix: \n";
-       
-       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
-         {
-           cout << "Zeile " << k << ":\t"; 
-           for (int l= 0; l<length_dna; l++)
-             {
-               cout << ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-             } 
-           cout <<"\n";
-         }
-       break;  
-      }
-      
-    case 2:
-      {
-       cout <<"Matrix: \n";
-       
-       for (int k = 0; k<length_est; k++)//k Zeile, l Spalte
-         {
-           cout << "Zeile " << k << ":\t"; 
-           for (int l= 0; l<length_dna; l++)
-             {
-               if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k-1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
-                 cout << "\\" <<((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l-1))
-                 cout << "<"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-               else if (((((Pre_score*)matrix + (k*length_dna) + l)->prev_i) == k -1) && ((((Pre_score*)matrix + (k*length_dna) + l)->prev_j) == l))
-                 cout << "|"<< ((Pre_score*)matrix + (k*length_dna) + l)->value <<"\t";  
-               else             { cout << ((Pre_score*)matrix + (k*length_dna) + l)->value << "("; 
-               cout << ((Pre_score*)matrix + (k*length_dna) + l)->prev_i << "," << ((Pre_score*)matrix + (k*length_dna) + l)->prev_j << ")\t";  
-               }
-               
-             } 
-           cout <<"\n";
-         }
-        
-       cout << "length_est" << length_est << "\n";
-        cout << "length_dna" << length_dna << "\n";
-        cout << "result_length: " << result_length << "\n";
-       
-       
-       break;  
-      } 
-      
-    case 3:
-      {
-       //Druck des Alignment
-       
-       // cout << "Alignment\n" ;
-       int j = 0;
-       // cout << "resultlength: " << result_length <<"\n";
-       while (j<result_length) {
-         int pacman = j;       
-         cout << "D:(" <<j<<")\t";
-         for (int i=0; ((j<result_length) && (i<50)); i++)
-           {
-             cout << vektor[j].dna_char << " ";        
-             j++;      
-           }
-         
-         cout << "\n"; 
-         
-         cout << "E:(" <<pacman<<")\t";
-         for (int i=0;((pacman<result_length) && (i<50)); i++)
-           {
-             cout << vektor[pacman].est_char << " "; 
-             pacman++;
-           }
-         cout << "\n\n";       
-       }               
-       
-       //Ende Druck Alignment
-       break ;
-      }
-      
-    default:
-      {
-       cout <<"";
-      }
-    }
-  
-  
-  
-}
-
diff --git a/dyn_prog/qpalma_dp.cpp b/dyn_prog/qpalma_dp.cpp
deleted file mode 100644 (file)
index f55c1e2..0000000
+++ /dev/null
@@ -1,376 +0,0 @@
-
-#include "qpalma_dp.h"
-#include <cstring>
-
-using namespace std;
-
-/*[splice_align, est_align, weightMatch, alignmentscores, dnaest] = ...
-  myalign([nr_paths], dna, est, {h}, matchmatrix, donor, acceptor, remove_duplicate_scores, ...
-  print_matrix) */
-
-Alignment::Alignment(int numQPlifs, int numq, bool use_qscores) {
-      len = 0;
-      limits = 0;
-      penalties = 0;
-      max_len = 0;
-      min_len = 0;
-      cache = 0;
-      //enum ETransformType transform = T_LINEAR;
-      id = 0;
-      name = 0;
-      use_svm = 0;
-
-      // set ptrs to zero first
-      splice_align         = 0;
-      est_align            = 0;
-      mmatrix_param        = 0;
-      alignmentscores      = 0;
-      qualityFeaturesAllPaths = 0;
-      mlen = 6; // score matrix: length of 6 for "- A C G T N"
-
-      numQualSuppPoints = numq;
-      numPlifs = numQPlifs;
-      use_quality_scores = use_qscores;
-
-      //printf("number of support points: %d\n",numQualSuppPoints);
-      //printf("number of plifs: %d\n",numPlifs );
-      FA( numQualSuppPoints >= 0 );
-      FA( numPlifs >= 0 );
-}
-
-void Alignment::getDNAEST(){}
-
-void Alignment::myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
-      int est_len_p, double* prb, double* chastity, struct penalty_struct h, double* matchmatrix, int mm_len,
-      double* donor, int d_len, double* acceptor, int a_len, struct penalty_struct* qualityScores, 
-      bool remove_duplicate_scores, bool print_matrix) {
-
-   // printf("Entering myalign...\n");
-   dna_len =  dna_len_p + 1 ;
-   est_len =  est_len_p + 1 ;
-   nr_paths = nr_paths_p;
-
-  /***************************************************************************/ 
-  // initialize alignment matrices and call fill_matrix()  
-  /***************************************************************************/
-
-  mode currentMode;
-  if (use_quality_scores) 
-   currentMode = USE_QUALITY_SCORES;
-  else
-   currentMode = NORMAL;
-
-    // dnaest
-   DNA_ARRAY = 0;
-   EST_ARRAY = 0;
-
-  //printf("[qpalma_dp] read is: %s\n",est);
-  
-  //possible donor positions
-  int nr_donor_sites = 0 ;
-  for (int ii=0; ii<d_len; ii++) {
-    if(isfinite(donor[ii])) {
-      nr_donor_sites++ ;
-    }
-  }
-
-  int* donor_sites = new int[nr_donor_sites];
-  int donor_idx = 0 ;
-  for (int ii=0; ii<d_len; ii++) {
-    if(isfinite(donor[ii])) {
-      donor_sites[donor_idx] = ii+1 ;
-      donor_idx++ ;
-    }  
-  }
-
-  int* max_score_positions = new int[nr_paths*2];
-
-  Pre_score** matrices = new Pre_score*[nr_paths];
-  //for (int z=0; z<nr_paths; z++) {
-  for (size_t  z=0; z<nr_paths; z++) {
-      matrices[z] = new Pre_score[dna_len * est_len];
-  }
-
-   //printf("qualityScores\n");
-   //for(int qidx=0;qidx<numPlifs;qidx++) {
-   //   penalty_struct p; 
-   //   p = qualityScores[qidx];
-   //   printf("%d ",qidx);
-   //   for(int pidx=0;pidx<p.len;pidx++)
-   //      printf("%f ",pidx,p.penalties[pidx]);
-   //   printf("\n");
-   //}
-
-   //int h_idx;
-   //for(h_idx=0;h_idx<h.len;h_idx++)
-   //   printf("%d %f %f\n",h_idx,h.limits[h_idx],h.penalties[h_idx]);
-  
-  //printf("calling fill_matrix...\n");
-  fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &h, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
-  //printf("after call to fill_matrix...\n");
-
-  /***************************************************************************/ 
-  // return arguments etc.
-  /***************************************************************************/
-  int result_length; //Eine Variable fuer alle Matrizen
-
-   splice_align_size = (dna_len-1)*nr_paths;
-   est_align_size = (est_len-1)*nr_paths;
-
-   int mmatrix_size;
-
-   if (currentMode == USE_QUALITY_SCORES) {
-      mmatrix_param_size = mlen*nr_paths;
-      mmatrix_size = mlen;
-   }
-
-   if (currentMode == NORMAL) {
-      mmatrix_param_size = (mlen*mlen)*nr_paths;
-      mmatrix_size = mlen*mlen;
-   }
-   
-   alignmentscores_size = nr_paths; //alignment score for each path/matrix
-   numPathsPlifs = numPlifs*nr_paths; //alignment score for each path/matrix
-
-   splice_align         = new int[splice_align_size];
-   est_align            = new int[est_align_size];
-   mmatrix_param        = new int[mmatrix_param_size];
-   alignmentscores      = new double[nr_paths]; //alignment score for each path/matrix
-
-
-  // printf("before memset...\n");
-  memset((char*)splice_align, -1, (dna_len-1)*nr_paths*sizeof(int)); // fills splice_align with zeros
-  memset((char*)est_align, -1, (est_len-1)*nr_paths*sizeof(int)); // fills est_align with zeros
-  memset((char*)mmatrix_param, 0, mmatrix_size*nr_paths*sizeof(int)); //fills mmatrix_param with zeros
-  memset(alignmentscores, -1, nr_paths*sizeof(double)); //fills alignmentscores with zeros
-  //printf("after memset...\n");
-  
-  qualityFeaturesAllPaths= new penalty_struct*[nr_paths];
-
-  //for (int z=0; z<nr_paths; z++) {
-  for (size_t z=0; z<nr_paths; z++) {
-    result_length = 0 ;
-    
-    int* s_align = splice_align + (dna_len-1)*z;  //pointer
-    int* e_align = est_align + (est_len-1)*z ;    //pointer
-    int* mparam  = mmatrix_param + mmatrix_size*z; //pointer
-
-   qualityFeaturesAllPaths[z] = new penalty_struct[numPlifs];
-
-   int qidx, pidx;
-   for(qidx=0;qidx<numPlifs;qidx++) {
-      penalty_struct p;
-      init_penalty_struct(p);
-      p.len = numQualSuppPoints;
-      p.limits = (double*) calloc(p.len,sizeof(double));
-
-      for(pidx=0;pidx<p.len;pidx++)
-         p.limits[pidx] = qualityScores[qidx].limits[pidx];
-
-      p.penalties = (double*) calloc(p.len,sizeof(double));
-      qualityFeaturesAllPaths[z][qidx] = p;
-   }
-
-   //penalty_struct* qparam = qualityScoresAllPaths + (numPlifs*z);
-
-    //printf("before call to result_align...\n");
-    bool no_more_path = result_align(matrices, z, est_len, dna_len, &result_length, est, dna, prb, chastity, s_align, e_align, mparam, alignmentscores, max_score_positions, qualityFeaturesAllPaths[z] , currentMode);
-    //printf("after call to result_align...\n");
-
-   //printf("z is %d\n",z);
-   //int len;
-   //for(qidx=0;qidx<numPlifs;qidx++) {
-   //   penalty_struct p;
-   //   p = qualityScoresAllPaths[z][qidx];
-   //   printf("%d: ",qidx);
-   //   for(pidx=0;pidx<p.len;pidx++)
-   //      printf("%f ",p.limits[pidx]);
-   //   printf("\n");
-
-   //   for(pidx=0;pidx<p.len;pidx++)
-   //      printf("%f ",p.penalties[pidx]);
-   //   printf("\n");
-   //}
-
-   if(z==0) {
-
-   if(DNA_ARRAY != 0) {
-       delete[] DNA_ARRAY;
-       delete[] EST_ARRAY;
-    }
-
-   result_len = result_length;
-   
-   DNA_ARRAY = new int[result_length];
-   EST_ARRAY = new int[result_length];
-
-    //backtracking
-    int i = max_score_positions[2*z] ; //i (est)
-    int j = max_score_positions[2*z +1] ; //j (dna)
-    int prev_i = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_i; 
-    int prev_j = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_j;
-    int prev_m_no = ((Pre_score*)matrices[z] + i*dna_len +j)->prev_matrix_no;
-    
-    for (int ii=result_length; ii>0; ii--) {
-      if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-      }
-      else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST_ARRAY
-       DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-       EST_ARRAY[ii-1] = 0 ; //gap
-      }
-      else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA_ARRAY
-       DNA_ARRAY[ii-1] = 0 ; //gap
-       EST_ARRAY[ii-1] = check_char(est[i-1]) ;
-      }
-      else {// splice site
-       for (j; j > prev_j; j--) {
-         DNA_ARRAY[ii-1] = check_char(dna[j-1]) ;
-         EST_ARRAY[ii-1] = 6 ; //intron
-         ii-- ;
-       }
-       ii++ ; // last ii-- too much (because done in for-loop) 
-      }
-      
-      i = prev_i;
-      j = prev_j;    
-      prev_i = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_i; 
-      prev_j = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_j; 
-      prev_m_no = ((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->prev_matrix_no;
-    }
-
-    } // end of "if z == 0"
-    
-   } //end of z
-
-   for (int i=nr_paths-1;i>=0; i--)
-      delete[] matrices[i];
-}
-
-void Alignment::getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores, double* qScores) {
-
-   size_t idx;
-   for(idx=0; idx<splice_align_size; idx++)
-      s_align[idx] = splice_align[idx];
-
-   for(idx=0; idx<est_align_size; idx++)
-      e_align[idx] =  est_align[idx];
-
-   for(idx=0; idx<mmatrix_param_size; idx++)
-      mmatrix_p[idx] = mmatrix_param[idx];
-
-   for(idx=0; idx<alignmentscores_size; idx++)
-      alignscores[idx] = alignmentscores[idx];
-   
-   if (use_quality_scores) {
-      penalty_struct currentPlif;
-      int ctr=0;
-      //for (int z=0; z<nr_paths; z++) {
-      for (size_t z=0; z<nr_paths; z++) {
-         for(int estChar=1;estChar<6;estChar++) {
-            for(int dnaChar=0;dnaChar<6;dnaChar++) {
-
-               int currentPos = (estChar-1)*6+dnaChar;
-               currentPlif = qualityFeaturesAllPaths[z][currentPos];
-
-               for(int pidx=0; pidx<currentPlif.len; pidx++) {
-                  qScores[ctr] = currentPlif.penalties[pidx];
-                  //printf("%f ",qScores[ctr]);
-                  ctr++;
-               }
-               //printf("\n");
-      }}}
-
-      //printf("\nctr is %d\n",ctr);
-   }
-   //printf("Leaving getAlignmentResults...\n");
-}
-
-void Alignment::getAlignmentArrays(int* dna_align, int* est_align) {
-
-   size_t idx;
-   for(idx=0; idx<result_len; idx++) {
-      dna_align[idx] = DNA_ARRAY[idx];
-      est_align[idx] = EST_ARRAY[idx];
-   }
-}
-
-
-PyObject* Alignment::getAlignmentResultsNew() {
-
-   PyObject* py_splice_align  = PyList_New(splice_align_size);
-   PyObject* py_est_align     = PyList_New(est_align_size);
-   PyObject* py_mmatrix       = PyList_New(mmatrix_param_size);
-   PyObject* py_align_scores  = PyList_New(alignmentscores_size);
-   PyObject* py_q_scores      = PyList_New(0);
-
-   size_t idx;
-   for(idx=0; idx<splice_align_size; idx++)
-      PyList_SetItem(py_splice_align,idx,PyInt_FromLong(splice_align[idx]));
-   //s_align[idx] = splice_align[idx];
-
-   for(idx=0; idx<est_align_size; idx++)
-      PyList_SetItem(py_est_align,idx,PyInt_FromLong(est_align[idx]));
-
-   //e_align[idx] =  est_align[idx];
-
-   for(idx=0; idx<mmatrix_param_size; idx++)
-      PyList_SetItem(py_mmatrix,idx,PyInt_FromLong(mmatrix_param[idx]));
-
-   //mmatrix_p[idx] = mmatrix_param[idx];
-
-   for(idx=0; idx<alignmentscores_size; idx++)
-      PyList_SetItem(py_align_scores,idx,PyFloat_FromDouble(alignmentscores[idx]));
-
-   //alignscores[idx] = alignmentscores[idx];
-   
-   if (use_quality_scores) {
-      penalty_struct currentPlif;
-      int ctr=0;
-      for (size_t z=0; z<nr_paths; z++) {
-      //for (int z=0; z<nr_paths; z++) {
-         for(int estChar=1;estChar<6;estChar++) {
-            for(int dnaChar=0;dnaChar<6;dnaChar++) {
-
-               int currentPos = (estChar-1)*6+dnaChar;
-               currentPlif = qualityFeaturesAllPaths[z][currentPos];
-
-               for(int pidx=0; pidx<currentPlif.len; pidx++) {
-                  //qScores[ctr] = currentPlif.penalties[pidx];
-                  //printf("%f ",qScores[ctr]);
-                  PyList_Append(py_q_scores,PyFloat_FromDouble(currentPlif.penalties[pidx]));
-                  ctr++;
-               }
-               //printf("\n");
-      }}}
-      //printf("\nctr is %d\n",ctr);
-   }
-
-   PyObject* result = PyTuple_Pack(5,
-   py_splice_align, py_est_align, py_mmatrix,
-   py_align_scores, py_q_scores);
-
-   //PyTuple_SetItem(result,0,py_splice_align);
-   //PyTuple_SetItem(result,1,py_est_align);
-   //PyTuple_SetItem(result,2,py_mmatrix);
-   //PyTuple_SetItem(result,3,py_align_scores);
-   //PyTuple_SetItem(result,4,py_q_scores);
-
-   return result;
-}
-
-PyObject* Alignment::getAlignmentArraysNew() {
-
-   PyObject* py_dna_align = PyList_New(result_len);
-   PyObject* py_est_align = PyList_New(result_len);
-
-   for(size_t idx=0; idx<result_len; idx++) {
-      PyList_SetItem(py_dna_align,idx,PyInt_FromLong(DNA_ARRAY[idx]));
-      PyList_SetItem(py_est_align,idx,PyInt_FromLong(EST_ARRAY[idx]));
-   }
-
-   PyObject* result = PyTuple_Pack(2, py_dna_align, py_est_align);
-   return result;
-}
diff --git a/dyn_prog/qpalma_dp.h b/dyn_prog/qpalma_dp.h
deleted file mode 100644 (file)
index 07c5f37..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-#ifndef _QPALMA_DP_H_
-#define _QPALMA_DP_H_
-
-#include "Python.h"
-
-#include "penalty_info.h"
-#include "debug_tools.h"
-
-struct ArrayElem { //24B
-  double score;
-  int prev_j;
-  int prev_i;
-  int prev_matrix_no;   
-  bool isSplice;
-} ;
-
-struct pre_score { //24B
-  double value; //8
-  int prev_i; //4
-  int prev_j; //4
-  int prev_matrix_no;//4
-  bool issplice;//4
-};
-
-typedef struct pre_score Pre_score;
-
-typedef struct Align_pair { //24B
-  double value ; 
-  bool issplice;
-  int previ,prevj ;
-  char est_char;
-  char dna_char;
-};
-
-void fill_matrix (int nr_paths, Pre_score* matrices[], int len_est, int len_dna, char* est, char* dna, double* prb, penalty_struct* functions, double* matchmatrix, penalty_struct* qualityScores, double* donor, double* acceptor, bool remove_duplicate_scores, int nr_donor_sites, int* donor_sites, int* max_score_positions);
-
-int check_char(char base);
-
-bool result_align (Pre_score* matrices[], int matrixnr, int i,  int j, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity,  int* s_align, int* e_align, int* m_param, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode);
-
-extern void print_align(Pre_score* matrix, int length_est,  int length_dna, Align_pair* vektor, int result_length, int print_matrix);
-
-
-/* 
- * The function myalign calculates a new alignment given the scoring scheme
- * Its input arguments are:
- *
- * num_path(id)               ->    an integer specifying which alignment has to be done
- * dna                        ->    
- * est                        ->
- * h                          ->
- * mmatrix                    -> 
- * donor                      ->
- * acceptor                   ->
- * remove_duplicate_scores    -> a boolean
- * print_matrix               -> a boolean
- *
- * [SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest] = myalign_local(...
- *
- * the idea of the qualityScores array is as follows
- *
- * consider a matrix of 24 plifs
- * 
- * -> row major
- *
- */
-
-class Alignment {
-
-   private:
-      int* splice_align;
-      int* est_align;
-      int* mmatrix_param;
-      double* alignmentscores;
-      struct penalty_struct** qualityFeaturesAllPaths;
-
-      size_t dna_len;
-      size_t est_len;
-      size_t mlen;
-      size_t nr_paths;
-
-      size_t result_len;
-      int* DNA_ARRAY;
-      int* EST_ARRAY;
-
-      int numQualSuppPoints;
-      int numPlifs;
-      bool use_quality_scores;
-
-      uint splice_align_size ;
-      uint est_align_size ;
-      uint mmatrix_param_size ;
-      uint alignmentscores_size ;
-      uint numPathsPlifs ;
-
-      INT len;
-      REAL *limits;
-      REAL *penalties;
-      INT max_len;
-      INT min_len;
-      REAL *cache;
-      enum ETransformType transform ;
-      INT id;
-      char * name;
-      INT use_svm;
-
-   public:
-      Alignment(int numQPlifs ,int numq, bool use_qscores);
-      ~Alignment() {
-         if(splice_align != 0)
-            delete[] splice_align;
-
-         if(est_align != 0)
-            delete[] est_align;
-
-         if(mmatrix_param != 0)
-            delete[] mmatrix_param;
-
-         if(alignmentscores != 0)
-            delete[] alignmentscores;
-
-         if(qualityFeaturesAllPaths != 0)
-            delete[] qualityFeaturesAllPaths;
-      }
-
-      void myalign(int nr_paths_p, char* dna, int dna_len_p, char* est,
-      int est_len_p, double* prb, double* chastity, struct penalty_struct h, 
-      double* matchmatrix, int mm_len, double* donor, int d_len, double* acceptor,
-      int a_len, struct penalty_struct* qualityScores, bool remove_duplicate_scores,
-      bool print_matrix);
-
-      void getDNAEST();
-
-      int getResultLength() { return result_len; }
-
-      void getAlignmentResults(int* s_align, int* e_align,
-      int* mmatrix_p, double* alignscores, double* qScores);
-    
-      void getAlignmentArrays(int* dna_align, int* est_align);
-
-
-      PyObject* getAlignmentResultsNew();
-      PyObject* getAlignmentArraysNew();
-};
-
-#endif  // _QPALMA_DP_H_
diff --git a/dyn_prog/result_align.cpp b/dyn_prog/result_align.cpp
deleted file mode 100644 (file)
index 2386360..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-#include "assert.h"
-#include "fill_matrix.h"
-#include "debug_tools.h"
-
-using namespace std;
-
-void increaseFeatureCount(penalty_struct* qparam, int dnaChar, int estChar, double estprb) {
-
-   FA(estChar > 0 && estChar < 6);
-   FA(dnaChar > -1 && dnaChar < 6);
-
-   int currentPos = (estChar-1)*6+dnaChar;
-
-   penalty_struct currentStruct = qparam[currentPos];
-
-   //printf("Current index %d est/dna: %d %d score %f\n",currentPos,estChar,dnaChar,estprb);
-
-   //if( estChar == 1 and dnaChar == 1) {
-   //   printf("before\n");
-   //   int p_idx;
-   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
-   //      printf("%f ",currentStruct.limits[p_idx]);
-   //   }
-   //   printf("\n");
-
-   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
-   //      printf("%f ",currentStruct.penalties[p_idx]);
-   //   }
-   //   printf("\n");
-   //}
-
-   double value = estprb;
-   int Lower = 0;
-   int idx;
-
-   for (idx=0;idx<currentStruct.len;idx++) {
-      if (currentStruct.limits[idx] <= value)
-         Lower++;
-   }
-
-   if (Lower == 0) {
-         currentStruct.penalties[0] += 1;
-         qparam[currentPos] = currentStruct;
-         return;
-   }
-
-   if (Lower == currentStruct.len) {
-         currentStruct.penalties[currentStruct.len-1] += 1;
-         qparam[currentPos] = currentStruct;
-         return;
-   }
-
-   Lower -= 1;
-   int Upper = Lower+1; // x-werte bleiben fest
-
-   double weightup  = 1.0*(value - currentStruct.limits[Lower]) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
-   double weightlow = 1.0*(currentStruct.limits[Upper] - value) / (currentStruct.limits[Upper] - currentStruct.limits[Lower]);
-   currentStruct.penalties[Upper] += weightup;
-   currentStruct.penalties[Lower] += weightlow; 
-
-   //if( estChar == 1 and dnaChar == 1) {
-   //   printf("estprb/Lower/Upper %f %d %d\n",estprb,Lower,Upper);
-   //   int p_idx;
-   //   printf("after\n");
-   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++) {
-   //      printf("%f ",currentStruct.limits[p_idx]);
-   //   }
-   //   printf("\n");
-   //   for(p_idx=0;p_idx<currentStruct.len;p_idx++)
-   //      printf("%f ",currentStruct.penalties[p_idx]);
-   //   printf("\n");
-   //}
-   qparam[currentPos] = currentStruct;
-}
-
-bool result_align(Pre_score* matrices[], int z, int est_len, int dna_len, int* result_length_ptr, char* est, char* dna, double* prb, double* chastity, int* s_align, int* e_align, int* mparam, double* alignmentscores, int* max_score_positions, penalty_struct* qparam, mode currentMode)
-//Gibt 1 zurueck, wenn kein weiterer Pfad existiert, sonst 0
-
-{  
-  const int mlen=6; // length of matchmatrix
-  int startend[4] ; // [i_start, j_start, i_end, j_end] ;
-  startend[3] = max_score_positions[2*z] ; //i (est)
-  startend[4] = max_score_positions[2*z +1] ; //j (dna)
-  //printf("in result_align: z=%d:(%d, %d)\n", z, startend[3], startend[4]) ;
-  int dnanum ; //using check_char: letter -> number
-  int estnum ; //using check_char: letter -> number
-
-  double prbnum ;
-  double chastitynum ;
-
-  int dna_pos ;
-  int est_pos ;
-  int splice_state ;
-  int est_state ;
-  for (dna_pos=dna_len-1; dna_pos>startend[4]; dna_pos--) {
-    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
-  }
-  for (est_pos=est_len-1; est_pos>startend[3]; est_pos--) {
-    e_align[est_pos-1] = 4 ;
-  }
-  //splice_align: 0: exon, 1: donor, 2: acceptor, 3: intro,. 4: dangling end
-
-  /* starting point for backtracking */
-  int i = startend[3] ; 
-  int j = startend[4] ;
-  int act_m_no  = z ;
-  alignmentscores[z] = ((Pre_score*)matrices[z] + i*dna_len +j)->value;
-  //printf("alignment_score for z=%d: %3.8f\n", z, alignmentscores[z]) ;
-
-  int prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_i; 
-  int prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_j;
-  int prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
-
-  //printf("Current z is %d\n",z);
-
-  if(!(isfinite(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)))
-  {
-    cout << "No additional path left (inf)!\n";
-    return 1;
-  }
-
-  if(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value==0) {
-    cout << "No additional path left (0)!\n";
-    return 1;
-  }
-
-  assert(dna_pos == j) ;
-  assert(est_pos == i) ;
-
-  splice_state = 0 ; //exon
-  est_state = 1 ; //[112222111 ... for exons of length 2,4,3 ...
-
-  //compute length of alignment (between max(m,n) and m+n)     
-  //local_alignment: backtracking to first Zero: look at actual value
-  while(!(((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value)==0.0) {
-    
-    //just some info
-    //if ((z>0) && (!(z==prev_m_no))) {
-    //  printf("(%d,%d): next step in next matrix\n", i,j) ;
-    //}
-
-    if ((prev_i == (i-1)) && (prev_j == (j-1))) { // match or mismatch
-      (*result_length_ptr)= (*result_length_ptr) + 1;
-      
-      s_align[dna_pos-1] = splice_state; 
-      dna_pos-- ;
-      e_align[est_pos-1] = est_state ; //1 or 2, depended
-      est_pos-- ;
-      
-      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
-      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
-
-      prbnum = prb[i-1];
-      chastitynum = chastity[i-1];
-
-      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
-
-      if(currentMode == USE_QUALITY_SCORES)
-         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
-      else
-         mparam[mlen*dnanum+estnum] ++ ;
-
-    }
-    else if ((prev_i == (i)) && (prev_j == (j-1))) {// gap on EST
-      (*result_length_ptr)= (*result_length_ptr) + 1;
-      
-      s_align[dna_pos-1] = splice_state; 
-      dna_pos-- ;
-
-      dnanum = check_char(dna[j-1]) ; //-1 because index starts with 0
-      estnum = 0 ; //gap
-
-      if(currentMode == USE_QUALITY_SCORES)
-         mparam[dnanum] ++ ;
-      else
-         mparam[mlen*dnanum +estnum] ++ ;
-
-    }
-    else if ((prev_i == (i-1)) && (prev_j == (j))) { // gap on DNA
-      (*result_length_ptr)= (*result_length_ptr) + 1;
-      
-      e_align[est_pos-1] = est_state ;  //1 or 2, depended
-      est_pos-- ;
-
-      dnanum = 0 ; //gap
-      estnum = check_char(est[i-1]) ; //-1 because index starts with 0
-
-      prbnum = prb[i-1];
-      chastitynum = chastity[i-1];
-
-      //printf("Position %d,%d est,dna: %d,%d\n",i,j,estnum,dnanum);
-
-      if(currentMode == USE_QUALITY_SCORES)
-         increaseFeatureCount(qparam,dnanum,estnum,prbnum);
-      else
-         mparam[mlen*dnanum +estnum] ++ ;
-    }
-    else {// splice site
-      (*result_length_ptr) =  (*result_length_ptr) + (j - prev_j);
-      if (((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice==0) {
-       printf("matrices[act_m_no](i,j): %3.15f\n",((Pre_score*)matrices[act_m_no] + i*dna_len +j)->value) ;
-       printf("matrices[prev_m_no](i,j): %3.15f\n",((Pre_score*)matrices[prev_m_no] + i*dna_len +j)->value) ;
-       printf("issplice(act): %d\n",((Pre_score*)matrices[act_m_no] + i*dna_len + j)->issplice) ;
-       printf("issplice(prev): %d\n",((Pre_score*)matrices[prev_m_no] + i*dna_len + j)->issplice) ;
-       printf("act_m_no: %d, prev_m_no: %d\n", act_m_no, prev_m_no) ;
-       printf("j: %d -> prev_j:%d\n", j, prev_j) ;
-       printf("i: %d -> prev_i:%d\n", i, prev_i) ;
-      }
-      if (est_state==1) { //was exon labeled "1"
-       est_state = 2 ; //new exon is labeled "2"
-      }
-      else {
-       est_state = 1 ; //last exon was labeled "2", new exon is labeled "1"
-      }
-      for (j; j > prev_j; j--) {
-       if (splice_state == 0) { //coming from exon
-         splice_state = 2 ; //first intron_state: acceptor
-       } else {
-         splice_state = 3 ; //intron
-       }
-       if (j-1 == prev_j) { //last intron_state: donor
-         splice_state = 1 ;//donor
-       }
-       s_align[dna_pos-1] = splice_state; 
-       dna_pos-- ;
-      }
-      splice_state = 0 ; //exon again
-      
-    }
-   
-    startend[1] = i;
-    startend[2] = j;
-    i = prev_i;
-    j = prev_j;
-    act_m_no = prev_m_no ;
-    
-    prev_i    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_i; 
-    prev_j    = ((Pre_score*)matrices[act_m_no] + i*dna_len + j)->prev_j; 
-    prev_m_no = ((Pre_score*)matrices[act_m_no] + i*dna_len +j)->prev_matrix_no;
-  }
-  for (dna_pos; dna_pos>0; dna_pos--) {
-    s_align[dna_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
-  }
-  for (est_pos; est_pos>0; est_pos--) {
-    e_align[est_pos-1] = 4 ; // not aligned (-1 because array starts with index 0)
-  }
-
-  //printf("(%d, %d) --> (%d, %d)\n", startend[1], startend[2], startend[3], startend[4]) ;
-  //printf("result_length: %d\n", *result_length_ptr) ;
-  return 0;
-}
diff --git a/dyn_prog/test_fill_matrix.cpp b/dyn_prog/test_fill_matrix.cpp
deleted file mode 100644 (file)
index b6d47ae..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <sstream>
-#include <ctime>
-#include <sys/timeb.h>  
-using namespace std;
-
-#include "qpalma_dp.h"
-
-void initialize_quality_plifs(penalty_struct* qplifs, double* params) {
-
-   printf("begin of initialize_quality_plifs \n");
-   double limits[10] = {-5.0, 0.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0};
-
-   int idx, pos;
-   int ctr = 36;
-
-   for(idx=0;idx<30;idx++) {
-      penalty_struct new_plif;
-      init_penalty_struct(new_plif);
-   
-      new_plif.min_len = -5;
-      new_plif.max_len = 40;
-      new_plif.len = 10;
-
-      new_plif.limits    = (double*) malloc(sizeof(double)*10);
-      if (new_plif.limits == NULL)
-         printf("malloc (limits)\n");
-
-      new_plif.penalties = (double*) malloc(sizeof(double)*10);
-      if (new_plif.penalties == NULL)
-         printf("malloc (penalties)\n");
-
-      for(pos=0;pos<10;pos++) {
-         new_plif.limits[pos]     = limits[pos];
-         new_plif.penalties[pos]  = params[ctr];
-         ctr++;
-      }
-
-      qplifs[idx] = new_plif;
-   }
-   
-   printf("end of initialize_quality_plifs \n");
-   return;
-}
-
-
-int main(int argc, char** argv){
-
-   printf("Starting test_fm\n");
-   int idx;
-   char* alphabet = (char*) malloc(sizeof(char)*4);
-
-   alphabet[0] = 'a';
-   alphabet[1] = 'c';
-   alphabet[2] = 'g';
-   alphabet[3] = 't';
-
-   int est_len =  36;
-   int dna_len = 10000;
-   int nr_paths = 2;
-
-   int nr_donor_sites = 0 ;
-   int d_len = dna_len;
-
-   printf("begin of donor/acceptor stuff\n");
-   double* donor     = (double*) malloc(sizeof(double)*dna_len);
-   double* acceptor  = (double*) malloc(sizeof(double)*dna_len);
-
-   for(idx=0;idx<dna_len;idx++) {
-      donor[idx]     = drand48();
-      acceptor[idx]  = drand48();
-   }
-
-   for (int ii=0; ii<d_len; ii++) {
-    if(isfinite(donor[ii])) {
-      nr_donor_sites++ ;
-    }
-   }
-
-   int* donor_sites = new int[nr_donor_sites];
-   int donor_idx = 0 ;
-   for (int ii=0; ii<d_len; ii++) {
-    if(isfinite(donor[ii])) {
-      donor_sites[donor_idx] = ii+1 ;
-      donor_idx++ ;
-    }  
-   }
-
-   printf("end of donor/acceptor stuff\n");
-
-   Pre_score** matrices = new Pre_score*[nr_paths];
-   for (int z=0; z<nr_paths; z++)
-      matrices[z] = new Pre_score[dna_len * est_len];
-
-   char* est = (char*) malloc(sizeof(char)*est_len);
-   char* dna = (char*) malloc(sizeof(char)*dna_len);
-
-   for(idx=0;idx<est_len;idx++)
-      est[idx] = alphabet[lrand48()%4];
-
-   for(idx=0;idx<dna_len;idx++)
-      dna[idx] = alphabet[lrand48()%4];
-      
-   double* prb = (double*) malloc(sizeof(double)*est_len);
-
-   for(idx=0;idx<est_len;idx++)
-      prb[idx] = lrand48() % 45;
-
-   double* matchmatrix = (double*) malloc(sizeof(double)*6);
-   matchmatrix[0] = 0;
-   matchmatrix[1] = -1;
-   matchmatrix[2] = -1;
-   matchmatrix[3] = -1;
-   matchmatrix[4] = -1;
-   matchmatrix[5] = 0;
-
-   REAL limits_array[10] = {19.999999999999993, 33.362010744001154, 55.651188044142465, 92.831776672255558, 154.85273653622525, 258.30993300297655, 430.88693800637651, 718.76273276092525, 1198.968500637882, 1999.9999999999982};
-   REAL pen_array[10] = {0.024414829630412388, 0.025836097651978609, 0.17473437525993854, 0.46902269254060858, -0.14383509580738071, -0.20766155250201096, -0.32734767088988909, -0.18507220858585502, -0.097862557903448902, -0.034886887426478781};
-
-   penalty_struct functions;
-
-   init_penalty_struct(functions);
-
-   functions.min_len = 20;
-   functions.max_len = 2000;
-   functions.len = 10;
-   functions.limits = &limits_array[0];
-   functions.penalties = &pen_array[0];
-
-   penalty_struct* qualityScores = (penalty_struct*) malloc(sizeof(penalty_struct)*30);
-
-   int num_params = 336;
-   double* params = (double*) malloc(sizeof(double)*num_params);
-
-   double parameter;
-   int ctr = 0;
-
-   string s;
-   ifstream infile("param.txt");
-
-   if (infile.fail()) 
-     printf("Could not open file param.txt for reading.\n");
-
-   while (getline(infile, s)) {
-      if (s.length() == 0 || s[0] == '#') continue;
-
-      istringstream iss(s);
-      iss >> parameter;
-      params[ctr] = parameter;
-      ctr++;
-   }
-
-   infile.close();
-   
-   initialize_quality_plifs(qualityScores,params);
-
-   bool remove_duplicate_scores = true;
-   int* max_score_positions = new int[nr_paths*2];
-   mode currentMode = USE_QUALITY_SCORES;
-
-   //time_t startTime;
-   //time_t endTime;
-
-   printf("calling fill_matrix\n");
-
-   //time(&startTime);
-
-   for(idx=0;idx<1;idx++)
-      fill_matrix(nr_paths, matrices, est_len, dna_len, est, dna, prb, &functions, matchmatrix, qualityScores, donor, acceptor, remove_duplicate_scores, nr_donor_sites, donor_sites, max_score_positions);
-
-   //time(&endTime);
-
-   //printf ("Scan time: %6.3f\n", difftime(endTime,startTime));
-
-   printf("End of test_fm \n");
-}
index 9fe0598..3682cf9 100644 (file)
@@ -1,5 +1,10 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
 
 import math
 import pdb
index d0549eb..a64226f 100644 (file)
@@ -25,6 +25,9 @@ import numpy
 
 from numpy.matlib import inf
 
+from Genefinding import doIntervalQuery
+
+
 def load_genomic(chromosome, strand, start, stop, genome, one_based=True):
    """
    This function stems from Cheng Soon Ongs genome_utils package.
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
index 832c155..a628a00 100644 (file)
--- a/setup.py
+++ b/setup.py
 from distutils.core import setup, Extension
+from distutils.command.build_py import build_py as _build_py
 
+import subprocess
+import os
 
-setup (name = 'QPalma' 
+
+class ExtBuilder(_build_py):
+   """Enhanced 'build_py' command that includes data files with packages
+
+   The data files are specified via a 'package_data' argument to 'setup()'.
+   See 'setuptools.dist.Distribution' for more details.
+
+   Also, this version of the 'build_py' command allows you to specify both
+   'py_modules' and 'packages' in the same setup operation.
+   """
+
+   def finalize_options(self): 
+      _build_py.finalize_options(self)
+      self.package_data = self.distribution.package_data 
+      self.data_files   = self.get_data_files()
+
+
+   def run(self):
+      """Build modules, packages, and copy data files to build directory"""
+
+      print """calling run...."""
+
+      if not self.py_modules and not self.packages:
+         return
+
+      if self.py_modules:
+         self.build_modules()
+
+      if self.packages:
+         self.build_packages()
+         self.build_package_data()
+
+      # Only compile actual .py files, using our base class' idea of what our
+      # output files are.
+      self.byte_compile(_build_py.get_outputs(self,include_bytecode=0))
+
+      cwd = os.getcwd()
+
+      print '-'*60
+      print 
+      print """QPalma relies on two C++ extension modules:
+      \t- ParaParser, and
+      \t- DynProg
+      """
+      print
+
+      print "Trying to compile ParaParser..."
+
+      currentStep = 'cd ParaParser && make'
+      obj = subprocess.Popen(currentStep,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+      out,err = obj.communicate()
+
+      if err != '':
+         print out
+         print err
+         print '-'*60
+
+      os.chdir(cwd)
+
+      print "Trying to compile DynProg..."
+
+      currentStep = 'cd DynProg && make'
+      obj = subprocess.Popen(currentStep,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+      out,err = obj.communicate()
+
+      if err != '':
+         print out
+         print err
+         print '-'*60
+
+#   def get_data_files(self):
+#
+#        """Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
+#
+#        data = []
+#
+#        for package in self.packages:
+#            # Locate package source directory
+#            src_dir = self.get_package_dir(package)
+#
+#            # Compute package build directory
+#            build_dir = os.path.join(*([self.build_lib]+package.split('.')))
+#
+#            # Length of path to strip from found files
+#            plen = len(src_dir)+1
+#
+#            # Strip directory from globbed filenames
+#            filenames = [
+#                file[plen:] for file in self.find_data_files(package, src_dir)
+#            ]
+#
+#            data.append( (package, src_dir, build_dir, filenames) )
+#
+#        return data
+#
+#
+#    def find_data_files(self, package, src_dir):
+#
+#        """Return filenames for package's data files in 'src_dir'"""
+#
+#        globs = self.package_data.get('',[])+self.package_data.get(package,[])
+#        files = []
+#
+#        for pattern in globs:
+#            # Each pattern has to be converted to a platform-specific path
+#            files.extend(glob(os.path.join(src_dir, convert_path(pattern))))
+#
+#        return files
+#
+#
+#
+#    def build_package_data(self):
+#
+#        """Copy data files into build directory"""
+#
+#        lastdir = None
+#
+#        for package, src_dir, build_dir, filenames in self.data_files:
+#
+#            for filename in filenames:
+#                target = os.path.join(build_dir,filename)
+#                self.mkpath(os.path.dirname(target))
+#                self.copy_file(os.path.join(src_dir,filename), target)
+#
+#
+#    def get_outputs(self, include_bytecode=1):
+#
+#        """Return complete list of files copied to the build directory
+#
+#        This includes both '.py' files and data files, as well as '.pyc' and
+#        '.pyo' files if 'include_bytecode' is true.  (This method is needed for
+#        the 'install_lib' command to do its job properly, and to generate a
+#        correct installation manifest.)
+#        """
+#
+#        return _build_py.get_outputs(include_bytecode) + [
+#            os.path.join(build_dir,filename)
+#                for package,src_dir,build_dir,filenames in self.data_files
+#                    for filename in filenames
+#        ]
+#
+
+setup (
+   name = 'QPalma',
    description = 'A package for optimal alignment of short reads',
    version = '1.0', 
    long_description = '''
    A
    ''', 
-   author = 'Fabio De Bona',
+   author = 'Fabio De Bona, Gunnar Raetsch',
    author_email = 'fabio@tuebingen.mpg.de',
    url = 'http://',
    license = 'GNU GPL version 3',
-   ext_package = "qpalma",
+   #ext_package = "qpalma",
    #ext_modules = extmods,
-   package_dir = {"qpalma": "python"},
-   #ext_modules=Extension('QPalmaDP',[''])
-   packages = ["qpalma"])
+   package_dir = {"qpalma": "qpalma"},
+   packages = ["qpalma","scripts"],
+   cmdclass={'build_py': ExtBuilder}
+   )