+ added first version of the heuristic for the mapping pipeline.
[qpalma.git] / scripts / PipelineHeuristic.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import pydb
7 import pdb
8 import os
9 import os.path
10 import math
11
12
13 class PipelineHeuristic:
14 """
15 This class wraps the filter which decides whether an alignment found by
16 vmatch is spliced an should be then newly aligned using QPalma or not.
17 """
18
19 def __init__(self,filename):
20 self.data_filename = filename
21
22
23 def filter(self):
24 SeqInfo, Exons, OriginalEsts, Qualities,\
25 AlternativeSequences = paths_load_data(data_filename,'training',None,self.ARGS)
26
27 for idx in range(len(SeqInfo)):
28 currentVMatchAligment
29 vMatchScore = calcAlignmentScore(currentVMatchAligment)
30
31 alternativeAlignments = calcAlternativeAligments(currentVMatchAligment)
32
33 maxScore = 0.0
34 maxAligment = None
35 for currentAlternative in alternativeAlignments:
36 if currentScore > maxScore:
37 maxScore = alternativeScores
38 maxAlignment = currentAlternative
39
40 currentScore =calcAlignmentScore(currentAlternative))
41
42
43 # Seems that according to our learned parameters VMatch found a good
44 # alignment of the current read
45 if maxScore < vMatchScore:
46 pass
47 # We found an alternative aligment considering splice sites that scores
48 # higher than the VMatch alignment
49 else:
50 pass
51
52
53
54
55 def calcAlignmentScore(self,dna,exons,est):
56 """
57 Given an alignment (dna,exons,est) and the current parameter for QPalma
58 this function calculates the dot product of the feature representation of
59 the alignment and the parameter vector i.e the alignment score.
60 """
61
62 currentPhi = zeros((run['numFeatures'],1))
63
64 # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
65 trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
66 computeSpliceAlignWithQuality(dna, exons, est, original_est,\
67 quality, qualityPlifs, run)
68
69 # Calculate the weights
70 trueWeightDon, trueWeightAcc, trueWeightIntron =\
71 computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
72 trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
73
74 currentPhi[0:lengthSP] = mat(h.penalties[:]).reshape(lengthSP,1)
75 currentPhi[lengthSP:lengthSP+donSP] = mat(d.penalties[:]).reshape(donSP,1)
76 currentPhi[lengthSP+donSP:lengthSP+donSP+accSP] = mat(a.penalties[:]).reshape(accSP,1)
77 currentPhi[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP] = mmatrix[:]
78
79 if run['mode'] == 'using_quality_scores':
80 totalQualityPenalties = param[-totalQualSP:]
81 currentPhi[lengthSP+donSP+accSP+mmatrixSP:] = totalQualityPenalties[:]
82
83 # Calculate w'phi(x,y) the total score of the alignment
84 return (trueWeight.T * currentPhi)[0,0]
85
86
87
88 if __name__ == '__main__':
89 filename = sys.argv[1]
90 out_filename = sys.argv[2]
91 ph1 = PipelineHeuristic(filename)
92 ph1.filter()