+ extended scripts
[qpalma.git] / scripts / qpalma_main.py
index 05ec085..46ca884 100644 (file)
@@ -187,7 +187,7 @@ class QPalma:
 
       data_filename = self.run['dataset_filename']
       Sequences, Acceptors, Donors, Exons, Ests, OriginalEsts, Qualities,\
-      UpCut, AlternativeSequences =\
+      UpCut, StartPos, AlternativeSequences =\
       paths_load_data(data_filename,'training',None,self.ARGS)
 
       # Load the whole dataset 
@@ -569,7 +569,7 @@ class QPalma:
 
       data_filename = self.run['dataset_filename']
       Sequences, Acceptors, Donors, Exons, Ests, OriginalEsts, Qualities,\
-      UpCut, AlternativeSequences=\
+      UpCut, StartPos, AlternativeSequences=\
       paths_load_data(data_filename,'training',None,self.ARGS)
 
       self.Sequences   = Sequences
@@ -580,6 +580,7 @@ class QPalma:
       self.Donors      = Donors
       self.Acceptors   = Acceptors
       self.UpCut       = UpCut
+      self.StartPos    = StartPos
 
       self.AlternativeSequences = AlternativeSequences
 
@@ -623,6 +624,7 @@ class QPalma:
       Acceptors   = self.Acceptors[beg:end]
       Donors      = self.Donors[beg:end]
       UpCut       = self.UpCut[beg:end]
+      StartPos    = self.StartPos[beg:end]
       #SplitPos    = self.SplitPositions[beg:end]
 
       AlternativeSequences = self.AlternativeSequences[beg:end]
@@ -692,6 +694,8 @@ class QPalma:
 
          current_up_cut = UpCut[exampleIdx]
 
+         current_start_pos = StartPos[exampleIdx]
+
          currentAlternatives = AlternativeSequences[exampleIdx]
 
          #est = est.replace('-','')
@@ -727,6 +731,7 @@ class QPalma:
          # first make a prediction on the dna fragment which comes from the ground truth                  
          current_prediction = self.calc_alignment(dna, est, exons, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs)
          current_prediction['exampleIdx'] = exampleIdx
+         current_prediction['start_pos'] = current_start_pos
 
          current_example_predictions.append(current_prediction)
 
@@ -735,10 +740,13 @@ class QPalma:
          for alternative_alignment in currentAlternatives:
             chr, strand, genomicSeq_start, genomicSeq_stop, currentLabel = alternative_alignment
             currentDNASeq, currentAcc, currentDon = get_seq_and_scores(chr,strand,genomicSeq_start,genomicSeq_stop,run['dna_flat_files'])
-            current_exons = exons - current_up_cut
 
-            current_prediction = self.calc_alignment(dna, est, exons, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs)
+            current_prediction = self.calc_alignment(currentDNASeq, est, exons,\
+            quality, currentDon, currentAcc, d, a, h, mmatrix, qualityPlifs)
             current_prediction['exampleIdx'] = exampleIdx
+            current_prediction['start_pos'] = current_start_pos
+            current_prediction['alternative_start_pos'] = genomicSeq_start
+            current_prediction['label'] = currentLabel
 
             current_example_predictions.append(current_prediction)