+ changed prediction
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 18 Apr 2008 17:21:27 +0000 (17:21 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Fri, 18 Apr 2008 17:21:27 +0000 (17:21 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8690 e1793c9e-67f9-0310-80fc-b846ff1f7b36

scripts/qpalma_main.py

index 0e160a6..7d278cb 100644 (file)
@@ -41,7 +41,6 @@ from qpalma.compute_donacc import *
 from qpalma.TrainingParam import Param
 from qpalma.Plif import Plf
 
-from qpalma.tools.splicesites import getDonAccScores
 from qpalma.Configuration import *
 
 # this two imports are needed for the load genomic resp. interval query
@@ -99,40 +98,27 @@ def getData(SeqInfo,OriginalEsts,Exons,exampleIdx,run):
    gt_tuple_pos = [p for p,e in enumerate(dna) if p>0 and p<len(dna)-1 and e=='g' and (dna[p+1]=='t' or dna[p+1]=='c')]
    assert gt_tuple_pos == [p for p,e in enumerate(don_supp) if e != -inf and p > 0], pdb.set_trace()
 
-   # check whether the whole business is really related to invalid
-   # splice scores
-   #acc_supp = [0.0]*len(acc_supp)
-   #don_supp = [0.0]*len(don_supp)
-
    original_exons = Exons[exampleIdx]
 
    exons = original_exons - (up_cut-1)
    exons[0,0] -= 1
    exons[1,0] -= 1
 
-   fetched_dna_subseq = dna[exons[0,0]:exons[0,1]] + dna[exons[1,0]:exons[1,1]]
-   
-   donor_elem = dna[exons[0,1]:exons[0,1]+2]
-   acceptor_elem = dna[exons[1,0]-2:exons[1,0]]
-
-   if not ( donor_elem == 'gt' or donor_elem == 'gc' ):
-      print 'invalid donor in example %d'% exampleIdx
-      raise SpliceSiteException
+   if exons.shape == (2,2):
+      fetched_dna_subseq = dna[exons[0,0]:exons[0,1]] + dna[exons[1,0]:exons[1,1]]
+      
+      donor_elem = dna[exons[0,1]:exons[0,1]+2]
+      acceptor_elem = dna[exons[1,0]-2:exons[1,0]]
 
-   if not ( acceptor_elem == 'ag' ):
-      print 'invalid acceptor in example %d'% exampleIdx
-      raise SpliceSiteException
+      if not ( donor_elem == 'gt' or donor_elem == 'gc' ):
+         print 'invalid donor in example %d'% exampleIdx
+         raise SpliceSiteException
 
-   assert len(fetched_dna_subseq) == len(est), pdb.set_trace()
+      if not ( acceptor_elem == 'ag' ):
+         print 'invalid acceptor in example %d'% exampleIdx
+         raise SpliceSiteException
 
-   #new_string = ''
-   #for idx in range(len(est)):
-   #   dna_char = fetched_dna_subseq[idx] 
-   #   est_char = est[idx]
-   #   if dna_char == est_char:
-   #      new_string += est_char
-   #   else:
-   #      new_string += '[%s%s]'%(dna_char,est_char)
+      assert len(fetched_dna_subseq) == len(est), pdb.set_trace()
 
    return dna,est,acc_supp,don_supp,exons,original_est
 
@@ -348,7 +334,6 @@ class QPalma:
          sys.exit(99)
 
       #solver = None
-
       #solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
       #solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
 
@@ -764,12 +749,6 @@ class QPalma:
 
          dna_len = len(dna)
 
-         #if new_string != original_est:
-         #   print 'seq. inconsistency'
-         #   print new_string,original_est
-         #   print exampleIdx
-         #   continue
-
          if run['mode'] == 'normal':
             quality = [40]*len(est)
 
@@ -792,16 +771,16 @@ class QPalma:
          current_example_predictions = []
 
          # first make a prediction on the dna fragment which comes from the ground truth                  
-         current_prediction = self.calc_alignment(dna, est, exons, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs)
-         current_prediction['exampleIdx'] = exampleIdx
-         current_prediction['id'] = id
-         current_prediction['start_pos']  = up_cut
-         current_prediction['label'] = True
-         current_prediction['true_cut'] = true_cut
-         current_prediction['chr'] = chr
-         current_prediction['strand'] = strand
-
-         current_example_predictions.append(current_prediction)
+         #current_prediction = self.calc_alignment(dna, est, exons, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs)
+         #current_prediction['exampleIdx'] = exampleIdx
+         #current_prediction['id'] = id
+         #current_prediction['start_pos']  = up_cut
+         #current_prediction['label'] = True
+         #current_prediction['true_cut'] = true_cut
+         #current_prediction['chr'] = chr
+         #current_prediction['strand'] = strand
+
+         #current_example_predictions.append(current_prediction)
 
          # then make predictions for all dna fragments that where occurring in
          # the vmatch results