+ added export_param function to export parameters to "palma definition files" (param...
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 10 Dec 2007 15:00:42 +0000 (15:00 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 10 Dec 2007 15:00:42 +0000 (15:00 +0000)
+ added parameter saving function which is called after training
+ fixed some indices for the splice weights

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@6978 e1793c9e-67f9-0310-80fc-b846ff1f7b36

python/computeSpliceWeights.py
python/export_param.py [new file with mode: 0644]
python/qpalma.py

index 8ed8183..7f9f048 100644 (file)
@@ -26,24 +26,24 @@ def calculateWeights(plf, scores):
    for k in range(len(scores)):
       value = scores[k]
       Lower = len([elem for elem in plf.limits if elem <= value])
-      # because we count from 0 in python
-      Lower -= 1
-      Upper = Lower+1 ; # x-werte bleiben fest
 
-      print value,Lower,Upper
 
       if Lower == 0:
          currentWeight[0] += 1
       elif Lower == len(plf.limits):
          currentWeight[-1] += 1
       else:
+         # because we count from 0 in python
+         Lower -= 1
+         Upper = Lower+1 ; # x-werte bleiben fest
+         #print value,Lower,Upper
          weightup  = 1.0*(value - plf.limits[Lower]) / (plf.limits[Upper] - plf.limits[Lower])
          weightlow = 1.0*(plf.limits[Upper] - value) / (plf.limits[Upper] - plf.limits[Lower])
          currentWeight[Upper] = currentWeight[Upper] + weightup
          currentWeight[Lower] = currentWeight[Lower] + weightlow
 
-         print plf.limits[Lower],plf.limits[Upper]
-         print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
+         #print plf.limits[Lower],plf.limits[Upper]
+         #print weightup,weightlow,currentWeight[Upper],currentWeight[Lower]
 
    return currentWeight
 
diff --git a/python/export_param.py b/python/export_param.py
new file mode 100644 (file)
index 0000000..5ddcd14
--- /dev/null
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+###########################################################
+#
+###########################################################
+
+import bz2
+
+def writeStruct(fid,plif):
+   fid.write('%s_len_limits=%s\n'%(plif.name,str(plif.limits)))
+   fid.write('%s_len_penalties=%s\n'%(plif.name,str(plif.penalties)))
+   fid.write('%s_len_bins=%d\n'%(plif.name,len(plif.limits)))
+
+   if plif.name == 'intron':
+      fid.write('%s_len_min=%d\n'%(plif.name,plif.min_len))
+      fid.write('%s_len_max=%d\n'%(plif.name,plif.max_len))
+      fid.write('%s_len_transform=%s\n'%(plif.name,plif.transform))
+
+def export_param(filename,h,d,a,mmatrix):
+
+   # Exports a bz2 file with the trained PALMA. Assumes splice sites and intron length used.
+   h.name = 'intron'
+   d.name = 'donor'
+   a.name = 'acceptor'
+
+   fid = bz2.BZ2File(filename+'.bz2','w')
+
+   fid.write('%palma definition file version: 1.0\n\n')
+   fid.write('%penalties\n');
+  
+   writeStruct(fid, h);
+   writeStruct(fid, a);
+   writeStruct(fid, d);
+
+   # substitution matrix
+   mmatrix = mmatrix.reshape(6,6)
+   fid.write('substitution_matrix=[')
+   for row in range(6):
+      if row == 5:
+         fid.write('%f, %f, %f, %f, %f, %f]\n'%(mmatrix[row,0],mmatrix[row,1],mmatrix[row,2],mmatrix[row,3],mmatrix[row,4],mmatrix[row,5]))
+      else:
+         fid.write('%f, %f, %f, %f, %f, %f;\n'%(mmatrix[row,0],mmatrix[row,1],mmatrix[row,2],mmatrix[row,3],mmatrix[row,4],mmatrix[row,5]))
+
+   fid.close()
index 33ac32d..977d340 100644 (file)
@@ -26,6 +26,7 @@ from computeSpliceAlign import *
 from penalty_lookup_new import *
 from compute_donacc import *
 from TrainingParam import Param
+from export_param import *
 
 import Configuration
 
@@ -81,15 +82,15 @@ class QPalma:
 
       #Sequences, Acceptors, Donors, Exons, Ests, QualityScores = paths_load_data('training',self.genome_info,self.ARGS)
 
+
       # number of training instances
       N = len(Sequences) 
       self.numExamples = N
       assert N == len(Acceptors) and N == len(Acceptors) and N == len(Exons)\
       and N == len(Ests), 'The Seq,Accept,Donor,.. arrays are of different lengths'
-
       self.plog('Number of training examples: %d\n'% N)
 
-      iteration_steps = 200 ; #upper bound on iteration steps
+      iteration_steps = 10 ; #upper bound on iteration steps
 
       remove_duplicate_scores = False
       print_matrix = False
@@ -102,6 +103,7 @@ class QPalma:
       # Set the parameters such as limits penalties for the Plifs
       [h,d,a,mmatrix] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
 
+
       # delete splicesite-score-information
       if not self.ARGS.train_with_splicesitescoreinformation:
          for i in range(len(Acceptors)):
@@ -129,7 +131,8 @@ class QPalma:
       iteration_nr = 1
 
       while True:
-         print 'Iteration step: %d'% iteration_nr
+         if iteration_nr == iteration_steps:
+            break
 
          for exampleIdx in range(self.numExamples):
             if exampleIdx% 1000 == 0:
@@ -197,39 +200,37 @@ class QPalma:
             qualityMat = QPalmaDP.createDoubleArrayFromList(qualityMatrix)
             currentAlignment.setQualityMatrix(qualityMat,self.numQualSuppPoints)
 
-            print 'PYTHON: Calling myalign...'
+            #print 'PYTHON: Calling myalign...'
             # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
             currentAlignment.myalign( num_path[exampleIdx], dna, dna_len,\
             est, est_len, ps, matchmatrix, mm_len, donor, d_len,\
             acceptor, a_len, remove_duplicate_scores, print_matrix)
-            print 'PYTHON: After myalign call...'
+            #print 'PYTHON: After myalign call...'
 
             newSpliceAlign       = QPalmaDP.createIntArrayFromList([0]*(dna_len*num_path[exampleIdx]))
             newEstAlign          = QPalmaDP.createIntArrayFromList([0]*(est_len*num_path[exampleIdx]))
             newWeightMatch       = QPalmaDP.createIntArrayFromList([0]*(mm_len*num_path[exampleIdx]))
             newAlignmentScores   = QPalmaDP.createDoubleArrayFromList([.0]*num_path[exampleIdx])
 
-            pdb.set_trace()
-
             currentAlignment.getAlignmentResults(newSpliceAlign, newEstAlign, newWeightMatch, newAlignmentScores)
 
             spliceAlign = zeros((num_path[exampleIdx]*dna_len,1))
             weightMatch = zeros((num_path[exampleIdx]*mm_len,1))
 
-            print 'spliceAlign'
-            for i in range(dna_len*num_path[exampleIdx]):
-               spliceAlign[i] = newSpliceAlign[i]
-               print '%f' % (spliceAlign[i])
+            #print 'spliceAlign'
+            #for i in range(dna_len*num_path[exampleIdx]):
+            #   spliceAlign[i] = newSpliceAlign[i]
+            #   print '%f' % (spliceAlign[i])
 
-            print 'weightMatch'
-            for i in range(mm_len*num_path[exampleIdx]):
-               weightMatch[i] = newWeightMatch[i]
-               print '%f' % (weightMatch[i])
+            #print 'weightMatch'
+            #for i in range(mm_len*num_path[exampleIdx]):
+            #   weightMatch[i] = newWeightMatch[i]
+            #   print '%f' % (weightMatch[i])
 
             for i in range(num_path[exampleIdx]):
                AlignmentScores[i+1] = newAlignmentScores[i]
 
-            print AlignmentScores
+            #print AlignmentScores
                
             spliceAlign = spliceAlign.reshape(num_path[exampleIdx],dna_len)
             weightMatch = weightMatch.reshape(num_path[exampleIdx],mm_len)
@@ -305,12 +306,18 @@ class QPalma:
                      for elem in self.slacks:
                         sum_xis +=  elem
 
-            if exampleIdx==0:
+                  for i in range(len(param)):
+                     param[i] = w[i]
+
+                  [h,d,a,mmatrix] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
+                  
+
+            if exampleIdx==10:
                break
 
          iteration_nr += 1
-         break
 
+      export_param('test_params',h,d,a,mmatrix)
       self.logfh.close()
       print 'Training completed'