+ added information on cut positions of spliced reads
[qpalma.git] / scripts / PipelineHeuristic.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import cPickle
5 import sys
6 import pydb
7 import pdb
8 import os
9 import os.path
10 import math
11
12
13 class PipelineHeuristic:
14 """
15 This class wraps the filter which decides whether an alignment found by
16 vmatch is spliced an should be then newly aligned using QPalma or not.
17 """
18
19 def __init__(self,filename):
20 self.data_filename = filename
21
22
23 def filter(self):
24 SeqInfo, Exons, OriginalEsts, Qualities,\
25 AlternativeSequences = paths_load_data(data_filename,'training',None,self.ARGS)
26
27 for idx in range(len(SeqInfo)):
28 currentVMatchAlignment
29 vMatchScore = calcAlignmentScore(currentVMatchAlignment)
30
31 alternativeAlignments = calcAlternativeAlignments(currentVMatchAlignment)
32
33 maxScore = 0.0
34 maxAlignment = None
35 for currentAlternative in alternativeAlignments:
36 if currentScore > maxScore:
37 maxScore = alternativeScores
38 maxAlignment = currentAlternative
39
40 currentScore = calcAlignmentScore(currentAlternative))
41
42 # Seems that according to our learned parameters VMatch found a good
43 # alignment of the current read
44 if maxScore < vMatchScore:
45 pass
46 # We found an alternative alignment considering splice sites that scores
47 # higher than the VMatch alignment
48 else:
49 pass
50
51
52
53 def calcAlternativeAlignments(self,currentVMatchAlignment):
54 """
55 Given an alignment proposed by Vmatch this function calculates possible
56 alternative alignments taking into account for example matched
57 donor/acceptor positions.
58 """
59
60 currentVMatchAlignment
61
62
63 return alternativeAlignments
64
65
66 def calcAlignmentScore(self,dna,exons,est):
67 """
68 Given an alignment (dna,exons,est) and the current parameter for QPalma
69 this function calculates the dot product of the feature representation of
70 the alignment and the parameter vector i.e the alignment score.
71 """
72
73 currentPhi = zeros((run['numFeatures'],1))
74
75 # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
76 trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
77 computeSpliceAlignWithQuality(dna, exons, est, original_est,\
78 quality, qualityPlifs, run)
79
80 # Calculate the weights
81 trueWeightDon, trueWeightAcc, trueWeightIntron =\
82 computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
83 trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
84
85 currentPhi[0:lengthSP] = mat(h.penalties[:]).reshape(lengthSP,1)
86 currentPhi[lengthSP:lengthSP+donSP] = mat(d.penalties[:]).reshape(donSP,1)
87 currentPhi[lengthSP+donSP:lengthSP+donSP+accSP] = mat(a.penalties[:]).reshape(accSP,1)
88 currentPhi[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP] = mmatrix[:]
89
90 if run['mode'] == 'using_quality_scores':
91 totalQualityPenalties = param[-totalQualSP:]
92 currentPhi[lengthSP+donSP+accSP+mmatrixSP:] = totalQualityPenalties[:]
93
94 # Calculate w'phi(x,y) the total score of the alignment
95 return (trueWeight.T * currentPhi)[0,0]
96
97
98
99 if __name__ == '__main__':
100 filename = sys.argv[1]
101 out_filename = sys.argv[2]
102 ph1 = PipelineHeuristic(filename)
103 ph1.filter()