git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@8599 e1793c9e...
[qpalma.git] / scripts / perform_remapping.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import pdb
6 import subprocess
7 import os.path
8
9 jp=os.path.join
10
11 class Remapping:
12 """
13 This class wraps the use of SOAP - "Short Oligonucleotide Alignment Program"
14 ( http://soap.genomics.org.cn/ )
15
16 """
17
18 soap_binary = '/fml/ag-raetsch/home/fabio/projects/soap_1.07/soap'
19
20 def __init__(self,reads,chromos,result_d):
21 self.reads = reads
22 self.chromosomes = chromos
23 self.result_dir = result_d
24
25 self.max_mismatch = 1
26 self.max_equal_hits = 100
27 self.max_gap_size = 6000
28 self.seed_size = 5
29
30
31 def performRemapping(self):
32 for currentChr,chromId in self.chromosomes:
33 result_filename ='soap_r2_v%d_w%d_g%d_s%d_%s.result' %\
34 (self.max_mismatch,self.max_equal_hits,self.max_gap_size,self.seed_size,chromId)
35
36 result = jp(self.result_dir,result_filename)
37
38 cmd = '%s -a %s -d %s -o %s' % (self.soap_binary,self.reads,currentChr,result)
39 cmd +=' -r 2 -v %d -w %d -g %d -s %d' %\
40 (self.max_mismatch,self.max_equal_hits,self.max_gap_size,self.seed_size)
41
42 #os.system('echo \"%s\" | qsub -l h_vmem=5.0G -cwd -j y -N \"%s.log\"'%(cmd,chromId))
43 obj = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
44 out,err = obj.communicate()
45 print out
46 print err
47
48
49 if __name__ == '__main__':
50 reads='/fml/ag-raetsch/share/projects/qpalma/solexa/allFilteredReads.fa'
51 # we expect a list of tuples where each tuple consists of a chromosome dna flat
52 # file location and a descriptor to name the result file for this chromosome
53 dir = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
54 chromo = [jp(dir,elem) for elem in [('chr%d.dna.fa'%idx) for idx in range(1,6)]]
55 ids = ['chr%d'%idx for idx in range(1,6)]
56 chromosomes = zip(chromo,ids)
57 #chromosomes=[('/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/chr1.dna.fa','chr1')]
58 result_dir = '/fml/ag-raetsch/share/projects/qpalma/solexa/soap_results/'
59
60 remap1 = Remapping(reads,chromosomes,result_dir)
61 remap1.performRemapping()