+ fixed a bug in the C++ interface
[qpalma.git] / standalone / palma / seqdict.py
1 import string
2
3 class predictions(object):
4 def __init__(self, positions=None, scores=None):
5 self.positions=positions
6 self.scores=scores
7
8 def set_positions(self, positions):
9 self.positions=positions;
10 def get_positions(self):
11 return self.positions
12
13 def set_scores(self, scores):
14 self.scores=scores
15 def get_scores(self):
16 return self.scores
17
18 def __str__(self):
19 return 'positions: ' + `self.positions` + 'scores: ' + `self.scores`
20 def __repr__(self):
21 return self.__str__()
22
23 class sequence(object):
24 def __init__(self, name, seq, (start,end)):
25 assert(start<end<=len(seq))
26 self.start=start
27 self.end=end
28 self.name=name
29 self.seq=seq
30 self.preds=dict()
31 self.preds['acceptor']=predictions()
32 self.preds['donor']=predictions()
33
34 def __str__(self):
35 s="start:" + `self.start`
36 s+=" end:" + `self.end`
37 s+=" name:" + `self.name`
38 s+=" sequence:" + `self.seq[0:10]`
39 s+="... preds:" + `self.preds`
40 return s
41 def __repr__(self):
42 return self.__str__()
43
44 def seqdict(dic):
45 """ takes a fasta dict as input and
46 generates a list of sequence objects from it """
47 sequences=list()
48
49 #translate string to ACGT / all non ACGT letters are mapped to A
50 tab=''
51 for i in xrange(256):
52 if chr(i).upper() in 'ACGT':
53 tab+=chr(i).upper()
54 else:
55 tab+='A'
56
57 for seqname in dic:
58 seq=string.translate(dic[seqname], tab)
59 seq=seq.upper()
60 #if end<0:
61 # stop=len(seq)+end
62 #else:
63 # stop=end
64
65 sequences.append(sequence(seqname, seq, (0,len(seq))))
66 #sequences.append(seq)
67
68 return sequences