+ extended test cases
[qpalma.git] / qpalma / qpalma_main.py
1 # This program is free software; you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation; either version 2 of the License, or
4 # (at your option) any later version.
5 #
6 # Written (W) 2008 Fabio De Bona
7 # Copyright (C) 2008 Max-Planck-Society
8
9 import array
10 import cPickle
11 import os.path
12 import pdb
13 import sys
14
15 import numpy
16 from numpy.matlib import mat,zeros,ones,inf
17 from numpy.linalg import norm
18
19 #from qpalma.SIQP_CPX import SIQPSolver
20 #from qpalma.SIQP_CVXOPT import SIQPSolver
21
22 import QPalmaDP
23 import qpalma
24 from qpalma.computeSpliceWeights import *
25 from qpalma.set_param_palma import *
26 from qpalma.computeSpliceAlignWithQuality import *
27 from qpalma.TrainingParam import Param
28 from qpalma.Plif import Plf,compute_donacc
29
30 from qpalma.sequence_utils import SeqSpliceInfo,DataAccessWrapper,unbracket_seq
31 from qpalma.utils import pprint_alignment, get_alignment
32
33 jp = os.path.join
34 dotProd = lambda x,y: (x.T * y)[0,0]
35
36 class SpliceSiteException:
37 pass
38
39
40 def getData(training_set,exampleKey,settings):
41 """ This function... """
42
43 currentSeqInfo,original_est,currentQualities,currentExons = training_set[exampleKey]
44 id,chr,strand,up_cut,down_cut = currentSeqInfo
45
46 est = original_est
47 est = "".join(est)
48 est = est.lower()
49 est = unbracket_est(est)
50 est = est.replace('-','')
51
52 est_len = len(est)
53
54 original_est = "".join(original_est)
55 original_est = original_est.lower()
56
57 dna_flat_files = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
58 dna, acc_supp, don_supp = get_seq_and_scores(chr,strand,up_cut,down_cut,dna_flat_files)
59
60 #currentDNASeq, currentAcc, currentDon = seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop)
61
62 original_exons = currentExons
63 exons = original_exons - (up_cut-1)
64 exons[0,0] -= 1
65 exons[1,0] -= 1
66
67 if exons.shape == (2,2):
68 fetched_dna_subseq = dna[exons[0,0]:exons[0,1]] + dna[exons[1,0]:exons[1,1]]
69
70 donor_elem = dna[exons[0,1]:exons[0,1]+2]
71 acceptor_elem = dna[exons[1,0]-2:exons[1,0]]
72
73 if not ( donor_elem == 'gt' or donor_elem == 'gc' ):
74 print 'invalid donor in example %d'% exampleKey
75 raise SpliceSiteException
76
77 if not ( acceptor_elem == 'ag' ):
78 print 'invalid acceptor in example %d'% exampleKey
79 raise SpliceSiteException
80
81 assert len(fetched_dna_subseq) == len(est), pdb.set_trace()
82
83 return dna,est,acc_supp,don_supp,exons,original_est,currentQualities
84
85
86 class QPalma:
87 """
88 This class wraps the training and prediction functions for
89 the alignment.
90 """
91
92 def __init__(self,seqInfo,dmode=False):
93 self.ARGS = Param()
94 self.qpalma_debug_mode = dmode
95 self.seqInfo = seqInfo
96
97
98 def plog(self,string):
99 self.logfh.write(string)
100 self.logfh.flush()
101
102
103 def do_alignment(self,dna,est,quality,mmatrix,donor,acceptor,ps,qualityPlifs,current_num_path,prediction_mode,settings):
104 """
105 Given the needed input this method calls the QPalma C module which
106 calculates a dynamic programming in order to obtain an alignment
107 """
108
109 dna_len = len(dna)
110 est_len = len(est)
111
112 prb = QPalmaDP.createDoubleArrayFromList(quality)
113 chastity = QPalmaDP.createDoubleArrayFromList([.0]*est_len)
114
115 matchmatrix = QPalmaDP.createDoubleArrayFromList(mmatrix.flatten().tolist()[0])
116 mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
117
118 d_len = len(donor)
119 donor = QPalmaDP.createDoubleArrayFromList(donor)
120 a_len = len(acceptor)
121 acceptor = QPalmaDP.createDoubleArrayFromList(acceptor)
122
123 # Create the alignment object representing the interface to the C/C++ code.
124 currentAlignment = QPalmaDP.Alignment(settings['numQualPlifs'],settings['numQualSuppPoints'], settings['enable_quality_scores'])
125 c_qualityPlifs = QPalmaDP.createPenaltyArrayFromList([elem.convert2SWIG() for elem in qualityPlifs])
126 # calculates SpliceAlign, EstAlign, weightMatch, Gesamtscores, dnaest
127 currentAlignment.myalign( current_num_path, dna, dna_len,\
128 est, est_len, prb, chastity, ps, matchmatrix, mm_len, donor, d_len,\
129 acceptor, a_len, c_qualityPlifs, settings['remove_duplicate_scores'],\
130 settings['print_matrix'] )
131
132 if prediction_mode:
133 # part that is only needed for prediction
134 result_len = currentAlignment.getResultLength()
135 dna_array,est_array = currentAlignment.getAlignmentArraysNew()
136 else:
137 dna_array = None
138 est_array = None
139
140 newSpliceAlign, newEstAlign, newWeightMatch, newDPScores, newQualityPlifsFeatures =\
141 currentAlignment.getAlignmentResultsNew()
142
143 return newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
144 newQualityPlifsFeatures, dna_array, est_array
145
146
147 def init_train(self,settings,run_name):
148 full_working_path = jp(settings['alignment_dir'],run_name)
149
150 #assert not os.path.exists(full_working_path)
151 if not os.path.exists(full_working_path):
152 os.mkdir(full_working_path)
153
154 assert os.path.exists(full_working_path)
155
156 # ATTENTION: Changing working directory
157 os.chdir(full_working_path)
158
159 self.logfh = open('_qpalma_train.log','w+')
160
161 train(settings)
162
163
164 def train(self,settings,training_set):
165 """
166 The mainloop for training.
167 """
168
169 numExamples = len(training_set)
170 self.plog('Number of training examples: %d\n'% numExamples)
171
172 self.noImprovementCtr = 0
173 self.oldObjValue = 1e8
174
175 remove_duplicate_scores = settings['remove_duplicate_scores']
176 print_matrix = settings['print_matrix']
177
178 lengthSP = settings['numLengthSuppPoints']
179 donSP = settings['numDonSuppPoints']
180 accSP = settings['numAccSuppPoints']
181 mmatrixSP = settings['matchmatrixRows']*settings['matchmatrixCols']
182 numq = settings['numQualSuppPoints']
183 totalQualSP = settings['totalQualSuppPoints']
184
185 # calculate the total number of features
186 numFeatures = lengthSP+donSP+accSP+mmatrixSP*numq
187
188 # Initialize parameter vector
189 param = numpy.matlib.rand(numFeatures,1)
190
191 # no intron length model
192 if not settings['enable_intron_length']:
193 param[:lengthSP] *= 0.0
194
195 # Set the parameters such as limits penalties for the Plifs
196 [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
197
198 # Initialize solver
199 self.plog('Initializing problem...\n')
200
201 try:
202 solver = SIQPSolver(numFeatures,numExamples,settings['C'],self.logfh,settings)
203 except:
204 self.plog('Got no license. Telling queue to reschedule job...\n')
205 sys.exit(99)
206
207 solver.enforceMonotonicity(lengthSP,lengthSP+donSP)
208 solver.enforceMonotonicity(lengthSP+donSP,lengthSP+donSP+accSP)
209
210 # stores the number of alignments done for each example (best path, second-best path etc.)
211 num_path = settings['anzpath']*numExamples
212
213 currentPhi = zeros((numFeatures,1))
214 totalQualityPenalties = zeros((totalQualSP,1))
215
216 numConstPerRound = settings['numConstraintsPerRound']
217 solver_call_ctr = 0
218
219 suboptimal_example = 0
220 iteration_nr = 0
221 param_idx = 0
222 const_added_ctr = 0
223
224 featureVectors = zeros((numFeatures,numExamples))
225
226 self.plog('Starting training...\n')
227 # the main training loop
228 while True:
229 if iteration_nr == settings['iter_steps']:
230 break
231
232 for exampleIdx,example_key in enumerate(training_set.keys()):
233 print 'Current example %d' % example_key
234
235 try:
236 dna,est,acc_supp,don_supp,exons,original_est,currentQualities =\
237 getData(training_set,example_key,settings)
238 except SpliceSiteException:
239 continue
240
241 if settings['enable_quality_scores']:
242 quality = currentQualities[quality_index]
243 else:
244 quality = [40]*len(read)
245
246 if not settings['enable_splice_scores']:
247 for idx,elem in enumerate(don_supp):
248 if elem != -inf:
249 don_supp[idx] = 0.0
250
251 for idx,elem in enumerate(acc_supp):
252 if elem != -inf:
253 acc_supp[idx] = 0.0
254
255
256 # Berechne die Parameter des wirklichen Alignments (but with untrained d,a,h ...)
257 if settings['enable_quality_scores']:
258 trueSpliceAlign, trueWeightMatch, trueWeightQuality ,dna_calc =\
259 computeSpliceAlignWithQuality(dna, exons, est, original_est,\
260 quality, qualityPlifs,settings)
261 else:
262 trueSpliceAlign, trueWeightMatch, trueWeightQuality = computeSpliceAlignWithQuality(dna, exons)
263
264 dna_calc = dna_calc.replace('-','')
265
266 #print 'right before computeSpliceWeights exampleIdx %d' % exampleIdx
267 # Calculate the weights
268 trueWeightDon, trueWeightAcc, trueWeightIntron =\
269 computeSpliceWeights(d, a, h, trueSpliceAlign, don_supp, acc_supp)
270 trueWeight = numpy.vstack([trueWeightIntron, trueWeightDon, trueWeightAcc, trueWeightMatch, trueWeightQuality])
271
272 currentPhi[0:lengthSP] = mat(h.penalties[:]).reshape(lengthSP,1)
273 currentPhi[lengthSP:lengthSP+donSP] = mat(d.penalties[:]).reshape(donSP,1)
274 currentPhi[lengthSP+donSP:lengthSP+donSP+accSP] = mat(a.penalties[:]).reshape(accSP,1)
275 currentPhi[lengthSP+donSP+accSP:lengthSP+donSP+accSP+mmatrixSP] = mmatrix[:]
276
277 if settings['enable_quality_scores']:
278 totalQualityPenalties = param[-totalQualSP:]
279 currentPhi[lengthSP+donSP+accSP+mmatrixSP:] = totalQualityPenalties[:]
280
281 # Calculate w'phi(x,y) the total score of the alignment
282 trueAlignmentScore = (trueWeight.T * currentPhi)[0,0]
283
284 # The allWeights vector is supposed to store the weight parameter
285 # of the true alignment as well as the weight parameters of the
286 # num_path[exampleIdx] other alignments
287 allWeights = zeros((numFeatures,num_path[exampleIdx]+1))
288 allWeights[:,0] = trueWeight[:,0]
289
290 AlignmentScores = [0.0]*(num_path[exampleIdx]+1)
291 AlignmentScores[0] = trueAlignmentScore
292
293 ################## Calculate wrong alignment(s) ######################
294 # Compute donor, acceptor with penalty_lookup_new
295 # returns two double lists
296 donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
297
298 ps = h.convert2SWIG()
299
300 newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
301 newQualityPlifsFeatures, unneeded1, unneeded2 =\
302 self.do_alignment(dna,est,quality,mmatrix,donor,acceptor,ps,qualityPlifs,num_path[exampleIdx],False,settings)
303 mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
304
305 newSpliceAlign = newSpliceAlign.reshape(num_path[exampleIdx],len(dna))
306 newWeightMatch = newWeightMatch.reshape(num_path[exampleIdx],mm_len)
307
308 newQualityPlifsFeatures = newQualityPlifsFeatures.reshape(num_path[exampleIdx],settings['totalQualSuppPoints'])
309 # Calculate weights of the respective alignments. Note that we are calculating n-best alignments without
310 # hamming loss, so we have to keep track which of the n-best alignments correspond to the true one in order
311 # not to incorporate a true alignment in the
312 # constraints. To keep track of the true and false alignments we
313 # define an array true_map with a boolean indicating the
314 # equivalence to the true alignment for each decoded alignment.
315 true_map = [0]*(num_path[exampleIdx]+1)
316 true_map[0] = 1
317
318 for pathNr in range(num_path[exampleIdx]):
319 weightDon, weightAcc, weightIntron = computeSpliceWeights(d, a,\
320 h, newSpliceAlign[pathNr,:].flatten().tolist()[0], don_supp,\
321 acc_supp)
322
323 decodedQualityFeatures = zeros((settings['totalQualSuppPoints'],1))
324 decodedQualityFeatures = newQualityPlifsFeatures[pathNr,:].T
325 # Gewichte in restliche Zeilen der Matrix speichern
326 allWeights[:,pathNr+1] = numpy.vstack([weightIntron, weightDon, weightAcc, newWeightMatch[pathNr,:].T, decodedQualityFeatures[:]])
327
328 hpen = mat(h.penalties).reshape(len(h.penalties),1)
329 dpen = mat(d.penalties).reshape(len(d.penalties),1)
330 apen = mat(a.penalties).reshape(len(a.penalties),1)
331 features = numpy.vstack([hpen, dpen, apen, mmatrix[:], totalQualityPenalties[:]])
332
333 featureVectors[:,exampleIdx] = allWeights[:,pathNr+1]
334
335 AlignmentScores[pathNr+1] = (allWeights[:,pathNr+1].T * features)[0,0]
336
337 distinct_scores = False
338 if math.fabs(AlignmentScores[pathNr] - AlignmentScores[pathNr+1]) > 1e-5:
339 distinct_scores = True
340
341 # Check wether scalar product + loss equals viterbi score
342 if not math.fabs(newDPScores[pathNr,0] - AlignmentScores[pathNr+1]) <= 1e-5:
343 self.plog("Scalar prod. + loss not equals Viterbi output!\n")
344 pdb.set_trace()
345
346 self.plog(" scalar prod (correct) : %f\n"%AlignmentScores[0])
347 self.plog(" scalar prod (pred.) : %f %f\n"%(newDPScores[pathNr,0],AlignmentScores[pathNr+1]))
348
349 # if the pathNr-best alignment is very close to the true alignment consider it as true
350 if norm( allWeights[:,0] - allWeights[:,pathNr+1] ) < 1e-5:
351 true_map[pathNr+1] = 1
352
353 if not trueAlignmentScore <= max(AlignmentScores[1:]) + 1e-6:
354 print "suboptimal_example %d\n" %exampleIdx
355 #trueSpliceAlign, trueWeightMatch, trueWeightQuality dna_calc=\
356 #computeSpliceAlignWithQuality(dna, exons, est, original_est, quality, qualityPlifs)
357
358 #pdb.set_trace()
359 suboptimal_example += 1
360 self.plog("suboptimal_example %d\n" %exampleIdx)
361
362 # the true label sequence should not have a larger score than the maximal one WHYYYYY?
363 # this means that all n-best paths are to close to each other
364 # we have to extend the n-best search to a (n+1)-best
365 if len([elem for elem in true_map if elem == 1]) == len(true_map):
366 num_path[exampleIdx] = num_path[exampleIdx]+1
367
368 # Choose true and first false alignment for extending
369 firstFalseIdx = -1
370 for map_idx,elem in enumerate(true_map):
371 if elem == 0:
372 firstFalseIdx = map_idx
373 break
374
375 if False:
376 self.plog("Is considered as: %d\n" % true_map[1])
377
378 #result_len = currentAlignment.getResultLength()
379
380 dna_array,est_array = currentAlignment.getAlignmentArraysNew()
381
382 _newSpliceAlign = newSpliceAlign[0].flatten().tolist()[0]
383 _newEstAlign = newEstAlign[0].flatten().tolist()[0]
384
385 # if there is at least one useful false alignment add the
386 # corresponding constraints to the optimization problem
387 if firstFalseIdx != -1:
388 firstFalseWeights = allWeights[:,firstFalseIdx]
389 differenceVector = trueWeight - firstFalseWeights
390 #pdb.set_trace()
391
392 const_added = solver.addConstraint(differenceVector, exampleIdx)
393 const_added_ctr += 1
394
395 # end of one example processing
396
397 # call solver every nth example //added constraint
398 if exampleIdx != 0 and exampleIdx % numConstPerRound == 0:
399 objValue,w,self.slacks = solver.solve()
400 solver_call_ctr += 1
401
402 if solver_call_ctr == 5:
403 numConstPerRound = 200
404 self.plog('numConstPerRound is now %d\n'% numConstPerRound)
405
406 if math.fabs(objValue - self.oldObjValue) <= 1e-6:
407 self.noImprovementCtr += 1
408
409 if self.noImprovementCtr == numExamples+1:
410 break
411
412 self.oldObjValue = objValue
413 print "objValue is %f" % objValue
414
415 sum_xis = 0
416 for elem in self.slacks:
417 sum_xis += elem
418
419 print 'sum of slacks is %f'% sum_xis
420 self.plog('sum of slacks is %f\n'% sum_xis)
421
422 for i in range(len(param)):
423 param[i] = w[i]
424
425 cPickle.dump(param,open('param_%d.pickle'%param_idx,'w+'))
426 param_idx += 1
427 [h,d,a,mmatrix,qualityPlifs] =\
428 set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
429
430 ##############################################
431 # end of one iteration through all examples #
432 ##############################################
433
434 self.plog("suboptimal rounds %d\n" %suboptimal_example)
435
436 if self.noImprovementCtr == numExamples*2:
437 FinalizeTraining(param,'param_%d.pickle'%param_idx)
438
439 iteration_nr += 1
440
441 #
442 # end of optimization
443 #
444 FinalizeTraining(param,'param_%d.pickle'%param_idx)
445
446
447 def FinalizeTraining(self,vector,name):
448 self.plog("Training completed")
449 cPickle.dump(param,open(name,'w+'))
450 self.logfh.close()
451
452
453 ###############################################################################
454 #
455 # End of the code needed for training
456 #
457 # Begin of code for prediction
458 #
459 ###############################################################################
460
461
462 def init_prediction(self,dataset_fn,prediction_keys,settings,set_name):
463 """
464 Performing a prediction takes...
465 """
466 self.set_name = set_name
467
468 full_working_path = settings['prediction_dir']
469
470 print 'full_working_path is %s' % full_working_path
471
472 #assert not os.path.exists(full_working_path)
473 if not os.path.exists(full_working_path):
474 os.mkdir(full_working_path)
475
476 assert os.path.exists(full_working_path)
477
478 # ATTENTION: Changing working directory
479 os.chdir(full_working_path)
480
481 self.logfh = open('_qpalma_predict_%s.log'%set_name,'w+')
482
483 # number of prediction instances
484 self.plog('Number of prediction examples: %d\n'% len(prediction_keys))
485
486 # load dataset and fetch instances that shall be predicted
487 dataset = cPickle.load(open(dataset_fn))
488
489 prediction_set = {}
490 for key in prediction_keys:
491 prediction_set[key] = dataset[key]
492
493 # we do not need the full dataset anymore
494 del dataset
495
496 self.predict(prediction_set,settings)
497
498
499 def predict(self,prediction_set,settings):
500 """
501 This method...
502 """
503
504 # Load parameter vector to predict with
505 param = cPickle.load(open(settings['prediction_param_fn']))
506
507 # Set the parameters such as limits/penalties for the Plifs
508 [h,d,a,mmatrix,qualityPlifs] =\
509 set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
510
511 if not self.qpalma_debug_mode:
512 self.plog('Starting prediction...\n')
513
514 self.problem_ctr = 0
515
516 # where we store the predictions
517 allPredictions = []
518
519 # we take the first quality vector of the tuple of quality vectors
520 quality_index = 0
521
522 # beginning of the prediction loop
523 for example_key in prediction_set.keys():
524 print 'Current example %d' % example_key
525 for example in prediction_set[example_key]:
526
527 currentSeqInfo,read,currentQualities = example
528
529 id,chromo,strand,genomicSeq_start,genomicSeq_stop =\
530 currentSeqInfo
531
532 if not self.qpalma_debug_mode:
533 self.plog('Loading example id: %d...\n'% int(id))
534
535 if settings['enable_quality_scores']:
536 quality = currentQualities[quality_index]
537 else:
538 quality = [40]*len(read)
539
540 try:
541 currentDNASeq, currentAcc, currentDon = self.seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop)
542 except:
543 self.problem_ctr += 1
544 print sys.exc_info()
545 continue
546
547 if not settings['enable_splice_scores']:
548 for idx,elem in enumerate(currentDon):
549 if elem != -inf:
550 currentDon[idx] = 0.0
551
552 for idx,elem in enumerate(currentAcc):
553 if elem != -inf:
554 currentAcc[idx] = 0.0
555
556 current_prediction = self.calc_alignment(currentDNASeq, read,\
557 quality, currentDon, currentAcc, d, a, h, mmatrix, qualityPlifs,settings)
558
559 current_prediction['id'] = id
560 current_prediction['chr'] = chromo
561 current_prediction['strand'] = strand
562 current_prediction['start_pos'] = genomicSeq_start
563
564 allPredictions.append(current_prediction)
565
566 if not self.qpalma_debug_mode:
567 self.FinalizePrediction(allPredictions)
568 else:
569 return allPredictions
570
571
572 def FinalizePrediction(self,allPredictions):
573 """ End of the prediction loop we save all predictions in a pickle file and exit """
574
575 cPickle.dump(allPredictions,open('%s.predictions.pickle'%(self.set_name),'w+'))
576 self.plog('Prediction completed\n')
577 mes = 'Problem ctr %d' % self.problem_ctr
578 self.plog(mes+'\n')
579 self.logfh.close()
580
581
582 def calc_alignment(self, dna, read, quality, don_supp, acc_supp, d, a, h, mmatrix, qualityPlifs,settings):
583 """
584 Given two sequences and the parameters we calculate on alignment
585 """
586
587 donor, acceptor = compute_donacc(don_supp, acc_supp, d, a)
588
589 if '-' in read:
590 self.plog('found gap\n')
591 read = read.replace('-','')
592 assert len(read) == Conf.read_size
593
594 ps = h.convert2SWIG()
595
596 newSpliceAlign, newEstAlign, newWeightMatch, newDPScores,\
597 newQualityPlifsFeatures, dna_array, read_array =\
598 self.do_alignment(dna,read,quality,mmatrix,donor,acceptor,ps,qualityPlifs,1,True,settings)
599
600 mm_len = settings['matchmatrixRows']*settings['matchmatrixCols']
601
602 true_map = [0]*2
603 true_map[0] = 1
604 pathNr = 0
605
606 _newSpliceAlign = array.array('B',newSpliceAlign)
607 _newEstAlign = array.array('B',newEstAlign)
608
609 #(qStart, qEnd, tStart, tEnd, num_exons, qExonSizes, qStarts, qEnds, tExonSizes, tStarts, tEnds)
610 alignment = get_alignment(_newSpliceAlign,_newEstAlign, dna_array, read_array)
611
612 dna_array = array.array('B',dna_array)
613 read_array = array.array('B',read_array)
614
615 newExons = self.calculatePredictedExons(newSpliceAlign)
616
617 current_prediction = {'predExons':newExons, 'dna':dna, 'read':read, 'DPScores':newDPScores,\
618 'alignment':alignment,'spliceAlign':_newSpliceAlign,'estAlign':_newEstAlign,\
619 'dna_array':dna_array, 'read_array':read_array }
620
621 return current_prediction
622
623
624 def calculatePredictedExons(self,SpliceAlign):
625 newExons = []
626 oldElem = -1
627 SpliceAlign.append(-1)
628 for pos,elem in enumerate(SpliceAlign):
629 if pos == 0:
630 oldElem = -1
631 else:
632 oldElem = SpliceAlign[pos-1]
633
634 if oldElem != 0 and elem == 0: # start of exon
635 newExons.append(pos)
636
637 if oldElem == 0 and elem != 0: # end of exon
638 newExons.append(pos)
639
640 return newExons