+ fixed automatic Python path settings in Makefiles
[qpalma.git] / qpalma / qpalma_main.py
1 # This program is free software; you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation; either version 2 of the License, or
4 # (at your option) any later version.
5 #
6 # Written (W) 2008 Fabio De Bona
7 # Copyright (C) 2008 Max-Planck-Society
8
9 import array
10 import cPickle
11 import os.path
12 import pdb
13 import sys
14
15 import numpy
16 from numpy.matlib import mat,zeros,ones,inf
17 from numpy.linalg import norm
18
19 #from qpalma.SIQP_CPX import SIQPSolver
20 from qpalma.SIQP_CVXOPT import SIQPSolver
21
22 import QPalmaDP
23 import qpalma
24 from qpalma.computeSpliceWeights import *
25 from qpalma.set_param_palma import *
26 from qpalma.computeSpliceAlignWithQuality import *
27 from qpalma.TrainingParam import Param
28 from qpalma.Plif import Plf,compute_donacc
29
30 from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper,unbracket_seq
31 from qpalma.utils import pprint_alignment, get_alignment
32
33 jp = os.path.join
34 dotProd = lambda x,y: (x.T * y)[0,0]
35
36
37 class SpliceSiteException:
38 pass
39
40
41 def preprocessExample(training_set,exampleKey,seqInfo,settings):
42 """
43 This function...
44 """
45
46 currentSeqInfo,originalRead,currentQualities,currentExons = training_set[exampleKey]
47 id,chr,strand,up_cut,down_cut = currentSeqInfo
48
49 read = unbracket_seq(originalRead)
50 read = read.replace('-','')
51
52 dna, acc_supp, don_supp = seqInfo.get_seq_and_scores(chr,strand,up_cut,down_cut)
53
54 exons = currentExons - up_cut
55
56 if exons.shape == (2,2):
57 fetched_dna_subseq = dna[exons[0,0]:exons[0,1]] + dna[exons[1,0]:exons[1,1]]
58 print fetched_dna_subseq
59
60 donor_elem = dna[exons[0,1]:exons[0,1]+2]
61 acceptor_elem = dna[exons[1,0]-2:exons[1,0]]
62
63 if not ( donor_elem == 'gt' or donor_elem == 'gc' ):
64 print 'invalid donor in example %d'% exampleKey
65 raise SpliceSiteException
66
67 if not ( acceptor_elem == 'ag' ):
68 print 'invalid acceptor in example %d'% exampleKey
69 raise SpliceSiteException
70
71 assert len(fetched_dna_subseq) == len(read), pdb.set_trace()
72
73 return dna,read,acc_supp,don_supp,exons,originalRead,currentQualities
74
75
76 def performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,current_num_path,prediction_mode,settings):
77 """
78 Given the needed input this method calls the QPalma C module which
79 calculates a dynamic programming in order to obtain an alignment
80 """
81
82 prb = QPalmaDP.createDoubleArrayFromList(quality)
83 chastity = QPalmaDP.createDoubleArrayFromList([.0]*len(read))
84
85 matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
86 mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
87
88 don_len = len(donor)
89 donor = QPalmaDP.createDoubleArrayFromList(donor)
90 acc_len = len(acceptor)
91 acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
92
93 # Create the alignment object representing the interface to the C/C++ code.
94 currentAlignment = QPalmaDP.Alignment(settings['numQualPlifs'],settings['numQualSuppPoints'], settings['enable_quality_scores'])
95
96 c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
97
98 # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
99 currentAlignment.myalign( current_num_path, dna, len(dna),\
100 read, len(read), prb, chastity, ps, matchmatrix, mm_len, donor, don_len,\
101 acceptor, acc_len, c_qualityPlifs, settings['remove_duplicate_scores'],\
102 settings['print_matrix'] )
103
104 if prediction_mode:
105 # part that is only needed for prediction
106 result_len = currentAlignment.getResultLength()
107 dna_array,read_array = currentAlignment.getAlignmentArraysNew()
108 else:
109 dna_array = None
110 read_array = None
111
112 newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures =\
113 currentAlignment.getAlignmentResultsNew()
114
115 return newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
116 newQualityPlifsFeatures, dna_array, read_array
117
118
119
120 class QPalma:
121 """
122 This class wraps the training and prediction functions for
123 the alignment.
124 """
125
126 def __init__(self,seqInfo,dmode=False):
127 self.ARGS = Param()
128 self.qpalma_debug_mode = dmode
129 self.seqInfo = seqInfo
130 self.logfh = None
131
132
133 def plog(self,string):
134 if self.logfh != None:
135 self.logfh.write(string)
136 self.logfh.flush()
137 else:
138 print string
139
140
141 def init_training(self,dataset_fn,training_keys,settings,set_name):
142 full_working_path = jp(settings['training_dir'],run_name)
143
144 #assert not os.path.exists(full_working_path)
145 if not os.path.exists(full_working_path):
146 os.mkdir(full_working_path)
147
148 assert os.path.exists(full_working_path)
149
150 # ATTENTION: Changing working directory
151 os.chdir(full_working_path)
152
153 self.logfh = open('_qpalma_train.log','w+')
154
155 training_set = cPickle.load(open(dataset_fn))
156
157 self.train(training_set,settings,set_name)
158
159
160 def train(self,training_set,settings,set_name):
161 """
162 The mainloop for training.
163 """
164
165 numExamples = len(training_set)
166 self.plog('Number of training examples: %d\n'% numExamples)
167
168 self.noImprovementCtr = 0
169 self.oldObjValue = 1e8
170
171 remove_duplicate_scores = settings['remove_duplicate_scores']
172 print_matrix = settings['print_matrix']
173
174 lengthSP = settings['numLengthSuppPoints']
175 donSP = settings['numDonSuppPoints']
176 accSP = settings['numAccSuppPoints']
177 mmatrixSP = settings['matchmatrixRows']*settings['matchmatrixCols']
178 numq = settings['numQualSuppPoints']
179 totalQualSP = settings['totalQualSuppPoints']
180
181 # calculate the total number of features
182 numFeatures = settings['numFeatures']
183
184 # Initialize parameter vector
185 param = numpy.matlib.rand(numFeatures,1)
186
187 # we take the first quality vector of the tuple of quality vectors
188 quality_index = 0
189
190 # no intron length model
191 if not settings['enable_intron_length']:
192 param[:lengthSP] *= 0.0
193
194 # Set the parameters such as limits penalties for the Plifs
195 [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
196
197 # Initialize solver
198 self.plog('Initializing problem...\n')
199
200 try:
201 solver = SIQPSolver(numFeatures,numExamples,settings['C'],self.logfh,settings)
202 except:
203 self.plog('Got no license. Telling queue to reschedule job...\n')
204 self.plog(sys.exc_info())
205 sys.exit(99)
206
207 #solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
208 #solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
209
210 # stores the number of alignments done for each example (best path, second-best path etc.)
211 num_path = [settings['anzpath']]*numExamples
212
213 currentPhi = zeros((numFeatures,1))
214 totalQualityPenalties = zeros((totalQualSP,1))
215
216 numConstPerRound = settings['numConstraintsPerRound']
217 solver_call_ctr = 0
218
219 suboptimal_example = 0
220 iteration_nr = 0
221 param_idx = 0
222 const_added_ctr = 0
223
224 featureVectors = zeros((numFeatures,numExamples))
225
226 self.plog('Starting training...\n')
227 # the main training loop
228 while True:
229 if iteration_nr == settings['iter_steps']:
230 break
231
232 #print 'Got %d examples' % len(training_set.keys())
233 for exampleIdx,example_key in enumerate(training_set.keys()):
234 print 'Current example %d' % example_key
235
236 try:
237 dna,read,acc_supp,don_supp,exons,original_read,currentQualities =\
238 preprocessExample(training_set,example_key,self.seqInfo,settings)
239 except SpliceSiteException:
240 continue
241
242 if settings['enable_quality_scores']:
243 quality = currentQualities[quality_index]
244 else:
245 quality = [40]*len(read)
246
247 if not settings['enable_splice_scores']:
248 for idx,elem in enumerate(don_supp):
249 if elem != -inf:
250 don_supp[idx] = 0.0
251
252 for idx,elem in enumerate(acc_supp):
253 if elem != -inf:
254 acc_supp[idx] = 0.0
255
256 # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
257 if settings['enable_quality_scores']:
258 trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
259 computeSpliceAlignWithQuality(dna, exons, read, original_read,\
260 quality, qualityPlifs,settings)
261 else:
262 trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
263
264 dna_calc = dna_calc.replace('-','')
265
266
267 #print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
268 # Calculate the weights
269 trueWeightDon, trueWeightAcc, trueWeightIntron =\
270 computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
271 trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
272
273 currentPhi[0:lengthSP] = mat(h.penalties[:]).reshape(lengthSP,1)
274 currentPhi[lengthSP:lengthSP+donSP] = mat(d.penalties[:]).reshape(donSP,1)
275 currentPhi[lengthSP+donSP:lengthSP+donSP+accSP] = mat(a.penalties[:]).reshape(accSP,1)
276 currentPhi[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP] = mmatrix[:]
277
278 if settings['enable_quality_scores']:
279 totalQualityPenalties = param[-totalQualSP:]
280 currentPhi[lengthSP+donSP+accSP+mmatrixSP:] = totalQualityPenalties[:]
281
282 # Calculate w'phi(x,y) the total score of the alignment
283 trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
284
285 # The allWeights vector is supposed to store the weight parameter
286 # of the true alignment as well as the weight parameters of the
287 # num_path[exampleIdx] other alignments
288 allWeights = zeros((numFeatures,num_path[exampleIdx]+1))
289 allWeights[:,0] = trueWeight[:,0]
290
291 AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
292 AlignmentScores[0] = trueAlignmentScore
293
294 ################## Calculate wrong alignment(s) ######################
295 # Compute donor, acceptor with penalty_lookup_new
296 # returns two double lists
297 donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
298
299 ps = h.convert2SWIG()
300
301 newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures, unused1, unused2 =\
302 performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
303
304 mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
305
306 # check for correct reshaping !!
307 newSpliceAlign = numpy.mat(newSpliceAlign).reshape(num_path[exampleIdx],len(dna))
308 newWeightMatch = numpy.mat(newWeightMatch).reshape(num_path[exampleIdx],mm_len)
309
310 newQualityPlifsFeatures = numpy.mat(newQualityPlifsFeatures).reshape(num_path[exampleIdx],settings['totalQualSuppPoints'])
311 # Calculate weights of the respective alignments. Note that we are calculating n-best alignments without
312 # hamming loss, so we have to keep track which of the n-best alignments correspond to the true one in order
313 # not to incorporate a true alignment in the
314 # constraints. To keep track of the true and false alignments we
315 # define an array true_map with a boolean indicating the
316 # equivalence to the true alignment for each decoded alignment.
317 true_map = [0]*(num_path[exampleIdx]+1)
318 true_map[0] = 1
319
320 for pathNr in range(num_path[exampleIdx]):
321 weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,\
322 h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,\
323 acc_supp)
324
325 decodedQualityFeatures = zeros((settings['totalQualSuppPoints'],1))
326 decodedQualityFeatures = newQualityPlifsFeatures[pathNr,:].T
327 # Gewichte in restliche Zeilen der Matrix speichern
328 allWeights[:,pathNr+1] = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures[:]])
329
330 hpen = mat(h.penalties).reshape(len(h.penalties),1)
331 dpen = mat(d.penalties).reshape(len(d.penalties),1)
332 apen = mat(a.penalties).reshape(len(a.penalties),1)
333 features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties[:]])
334
335 featureVectors[:,exampleIdx] = allWeights[:,pathNr+1]
336
337 AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
338
339 distinct_scores = False
340 if math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) > 1e-5:
341 distinct_scores = True
342
343 # Check wether scalar product + loss equals viterbi score
344 if not math.fabs(newDPScores[pathNr] - AlignmentScores[pathNr+1]) <= 1e-5:
345 self.plog("Scalar prod. + loss not equals Viterbi output!\n")
346 pdb.set_trace()
347
348 self.plog(" scalar prod (correct) : %f\n"%AlignmentScores[0])
349 self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr],AlignmentScores[pathNr+1]))
350
351 # if the pathNr-best alignment is very close to the true alignment consider it as true
352 if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
353 true_map[pathNr+1] = 1
354
355 if not trueAlignmentScore <= max(AlignmentScores[1:]) + 1e-6:
356 print "suboptimal_example %d\n" %exampleIdx
357 #trueSpliceAlign, trueWeightMatch, trueWeightQuality dna_calc=\
358 #computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, qualityPlifs)
359
360 #pdb.set_trace()
361 suboptimal_example += 1
362 self.plog("suboptimal_example %d\n" %exampleIdx)
363
364 # the true label sequence should not have a larger score than the maximal one WHYYYYY?
365 # this means that all n-best paths are to close to each other
366 # we have to extend the n-best search to a (n+1)-best
367 if len([elem for elem in true_map if elem == 1]) == len(true_map):
368 num_path[exampleIdx] = num_path[exampleIdx]+1
369
370 # Choose true and first false alignment for extending
371 firstFalseIdx = -1
372 for map_idx,elem in enumerate(true_map):
373 if elem == 0:
374 firstFalseIdx = map_idx
375 break
376
377 if False:
378 self.plog("Is considered as: %d\n" % true_map[1])
379 dna_array,est_array = currentAlignment.getAlignmentArraysNew()
380 _newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
381 _newEstAlign = newEstAlign[0].flatten().tolist()[0]
382
383 # if there is at least one useful false alignment add the
384 # corresponding constraints to the optimization problem
385 if firstFalseIdx != -1:
386 firstFalseWeights = allWeights[:,firstFalseIdx]
387 differenceVector = trueWeight - firstFalseWeights
388 #pdb.set_trace()
389
390 const_added = solver.addConstraint(differenceVector, exampleIdx)
391 const_added_ctr += 1
392
393 # end of one example processing
394
395 # call solver every nth example / added constraint
396 #if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
397 if True:
398 objValue,w,self.slacks = solver.solve()
399 solver_call_ctr += 1
400
401 if solver_call_ctr == 5:
402 numConstPerRound = 200
403 self.plog('numConstPerRound is now %d\n'% numConstPerRound)
404
405 if math.fabs(objValue - self.oldObjValue) <= 1e-6:
406 self.noImprovementCtr += 1
407
408 if self.noImprovementCtr == numExamples+1:
409 break
410
411 self.oldObjValue = objValue
412 print "objValue is %f" % objValue
413
414 sum_xis = 0
415 for elem in self.slacks:
416 sum_xis += elem
417
418 print 'sum of slacks is %f'% sum_xis
419 self.plog('sum of slacks is %f\n'% sum_xis)
420
421 for i in range(len(param)):
422 param[i] = w[i]
423
424 cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
425 param_idx += 1
426 [h,d,a,mmatrix,qualityPlifs] =\
427 set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
428
429 # end of one iteration through all examples
430 self.plog("suboptimal rounds %d\n" %suboptimal_example)
431
432 if self.noImprovementCtr == numExamples*2:
433 self.FinalizeTraining(param,'param_%d.pickle'%param_idx)
434
435 iteration_nr += 1
436
437 #
438 # end of optimization
439 #
440 self.FinalizeTraining(param,'param_%d.pickle'%param_idx)
441
442
443 def FinalizeTraining(self,param,name):
444 self.plog("Training completed")
445 cPickle.dump(param,open(name,'w+'))
446 if self.logfh != None:
447 self.logfh.close()
448
449
450 ###############################################################################
451 #
452 # End of the code needed for training
453 #
454 # Begin of code for prediction
455 #
456 ###############################################################################
457
458
459 def init_prediction(self,dataset_fn,prediction_keys,settings,set_name):
460 """
461 Performing a prediction takes...
462 """
463 self.set_name = set_name
464
465 full_working_path = settings['prediction_dir']
466
467 print 'full_working_path is %s' % full_working_path
468
469 #assert not os.path.exists(full_working_path)
470 if not os.path.exists(full_working_path):
471 os.mkdir(full_working_path)
472
473 assert os.path.exists(full_working_path)
474
475 # ATTENTION: Changing working directory
476 os.chdir(full_working_path)
477
478 self.logfh = open('_qpalma_predict_%s.log'%set_name,'w+')
479
480 # number of prediction instances
481 self.plog('Number of prediction examples: %d\n'% len(prediction_keys))
482
483 # load dataset and fetch instances that shall be predicted
484 dataset = cPickle.load(open(dataset_fn))
485
486 prediction_set = {}
487 for key in prediction_keys:
488 prediction_set[key] = dataset[key]
489
490 # we do not need the full dataset anymore
491 del dataset
492
493 self.predict(prediction_set,settings)
494
495
496 def predict(self,prediction_set,settings):
497 """
498 This method...
499 """
500
501 # Load parameter vector to predict with
502 param = cPickle.load(open(settings['prediction_param_fn']))
503
504 # Set the parameters such as limits/penalties for the Plifs
505 [h,d,a,mmatrix,qualityPlifs] =\
506 set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
507
508 if not self.qpalma_debug_mode:
509 self.plog('Starting prediction...\n')
510
511 self.problem_ctr = 0
512
513 # where we store the predictions
514 allPredictions = []
515
516 # we take the first quality vector of the tuple of quality vectors
517 quality_index = 0
518
519 # beginning of the prediction loop
520 for example_key in prediction_set.keys():
521 print 'Current example %d' % example_key
522 for example in prediction_set[example_key]:
523
524 currentSeqInfo,read,currentQualities = example
525
526 id,chromo,strand,genomicSeq_start,genomicSeq_stop =\
527 currentSeqInfo
528
529 if not self.qpalma_debug_mode:
530 self.plog('Loading example id: %d...\n'% int(id))
531
532 if settings['enable_quality_scores']:
533 quality = currentQualities[quality_index]
534 else:
535 quality = [40]*len(read)
536
537 try:
538 currentDNASeq, currentAcc, currentDon = self.seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop)
539 except:
540 self.problem_ctr += 1
541 print sys.exc_info()
542 continue
543
544 if not settings['enable_splice_scores']:
545 for idx,elem in enumerate(currentDon):
546 if elem != -inf:
547 currentDon[idx] = 0.0
548
549 for idx,elem in enumerate(currentAcc):
550 if elem != -inf:
551 currentAcc[idx] = 0.0
552
553 current_prediction = self.calc_alignment(currentDNASeq, read,\
554 quality, currentDon, currentAcc, d, a, h, mmatrix, qualityPlifs,settings)
555
556 current_prediction['id'] = id
557 current_prediction['chr'] = chromo
558 current_prediction['strand'] = strand
559 current_prediction['start_pos'] = genomicSeq_start
560
561 allPredictions.append(current_prediction)
562
563 if not self.qpalma_debug_mode:
564 self.FinalizePrediction(allPredictions)
565 else:
566 return allPredictions
567
568
569 def FinalizePrediction(self,allPredictions):
570 """ End of the prediction loop we save all predictions in a pickle file and exit """
571
572 cPickle.dump(allPredictions,open('%s.predictions.pickle'%(self.set_name),'w+'))
573 self.plog('Prediction completed\n')
574 mes = 'Problem ctr %d' % self.problem_ctr
575 self.plog(mes+'\n')
576 self.logfh.close()
577
578
579 def calc_alignment(self, dna, read, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs,settings):
580 """
581 Given two sequences and the parameters we calculate on alignment
582 """
583
584 donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
585
586 if '-' in read:
587 self.plog('found gap\n')
588 read = read.replace('-','')
589 assert len(read) == Conf.read_size
590
591 ps = h.convert2SWIG()
592
593 newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
594 newQualityPlifsFeatures, dna_array, read_array =\
595 performAlignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,1,True,settings)
596
597 mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
598
599 true_map = [0]*2
600 true_map[0] = 1
601 pathNr = 0
602
603 _newSpliceAlign = array.array('B',newSpliceAlign)
604 _newEstAlign = array.array('B',newEstAlign)
605
606 #(qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds, tExonSizes, tStarts, tEnds)
607 alignment = get_alignment(_newSpliceAlign,_newEstAlign, dna_array, read_array)
608
609 dna_array = array.array('B',dna_array)
610 read_array = array.array('B',read_array)
611
612 newExons = self.calculatePredictedExons(newSpliceAlign)
613
614 current_prediction = {'predExons':newExons, 'dna':dna, 'read':read, 'DPScores':newDPScores,\
615 'alignment':alignment,'spliceAlign':_newSpliceAlign,'estAlign':_newEstAlign,\
616 'dna_array':dna_array, 'read_array':read_array }
617
618 return current_prediction
619
620
621 def calculatePredictedExons(self,SpliceAlign):
622 newExons = []
623 oldElem = -1
624 SpliceAlign.append(-1)
625 for pos,elem in enumerate(SpliceAlign):
626 if pos == 0:
627 oldElem = -1
628 else:
629 oldElem = SpliceAlign[pos-1]
630
631 if oldElem != 0 and elem == 0: # start of exon
632 newExons.append(pos)
633
634 if oldElem == 0 and elem != 0: # end of exon
635 newExons.append(pos)
636
637 return newExons