added specific config files
authorRichard <richard.neher@tuebingen.mpg.de>
Wed, 25 Sep 2013 09:10:09 +0000 (11:10 +0200)
committerRichard <richard.neher@tuebingen.mpg.de>
Wed, 25 Sep 2013 09:10:09 +0000 (11:10 +0200)
configFile.py
configFile_454_run1.py [new file with mode: 0644]
configFile_454_run2.py [new file with mode: 0644]
src/p1_trim_and_filter.py
src/p3_cluster_align.py
src/p4_consensus.py

index a0349db..d6c66f5 100644 (file)
@@ -6,16 +6,18 @@
 # 454 #
 #######
 cfg={
+    'runid':'454_subsample',
     'p5_virus_match':'GTAGCATGACAAAAATCTTAGAGCC',
     'p3_virus_match':'CATTRCTTTGGATGGGTATGAA',
     'barcodes':['ACG','CGT','TAC'],
-    'input_data_file':'../data/rawdata_reg2.fsa',
+#    'input_data_file':'../data/rawdata_reg2.fsa',
+    'input_data_file':'data/subsample.fasta',
     'p5_cutoff': 21,#=len(p5_virus_match)-4
     'p3_cutoff': 18,#=len(p3_virus_match)-4
     'min_occ_same_pid':1,
     'min_length_pid':10,
-    'reverse'=True#,
-    #'barcode_length_range':range(3,5)
+    'reverse':True,
+    'barcode_length_range':range(3,5)
 }
 
 ##############
@@ -26,10 +28,10 @@ cfg={
 #    'p5_virus_match':'TGGCAGTCTAGCAGAAGAAG',
 #    'p3_virus_match':'CCTCAGGAGGGGACCCAG',
 #    'barcodes':['TACG','ACGT','CGTA','GTAC'],
-#    'input_data_file':'data/subsample_100000_iontorrent.fastq',
+#    'input_data_file':'data/subsample_iontorrent.fastq',
 #    'reverse':False,
 #    'min_occ_same_pid':1,
 #    'min_length_pid':8,
 #    'barcode_length_range':range(3,5)
 #}
-
+#
diff --git a/configFile_454_run1.py b/configFile_454_run1.py
new file mode 100644 (file)
index 0000000..3314898
--- /dev/null
@@ -0,0 +1,22 @@
+###
+# Configuration parameters for: p1_trim_and_filter
+###
+
+#######
+# 454 #
+#######
+cfg={
+    'runid':'454_subsample_run1',
+    'p5_virus_match':'GTAGCATGACAAAAATCTTAGAGCC',
+    'p3_virus_match':'CATTRCTTTGGATGGGTATGAA',
+    'barcodes':['ACG','CAG','GTA', 'GTC'],
+#    'input_data_file':'../data/rawdata_reg2.fsa',
+    'input_data_file':'data/subsample.fasta',
+    'p5_cutoff': 21,#=len(p5_virus_match)-4
+    'p3_cutoff': 18,#=len(p3_virus_match)-4
+    'min_occ_same_pid':1,
+    'min_length_pid':10,
+    'reverse':True,
+    'barcode_length_range':range(3,5)
+}
+
diff --git a/configFile_454_run2.py b/configFile_454_run2.py
new file mode 100644 (file)
index 0000000..445422c
--- /dev/null
@@ -0,0 +1,22 @@
+###
+# Configuration parameters for: p1_trim_and_filter
+###
+
+#######
+# 454 #
+#######
+cfg={
+    'runid':'454_subsample_run2',
+    'p5_virus_match':'GTAGCATGACAAAAATCTTAGAGCC',
+    'p3_virus_match':'CATTRCTTTGGATGGGTATGAA',
+    'barcodes':['ACG','CGT','TAC'],
+#    'input_data_file':'../data/rawdata_reg2.fsa',
+    'input_data_file':'data/subsample.fasta',
+    'p5_cutoff': 21,#=len(p5_virus_match)-4
+    'p3_cutoff': 18,#=len(p3_virus_match)-4
+    'min_occ_same_pid':1,
+    'min_length_pid':10,
+    'reverse':True,
+    'barcode_length_range':range(3,5)
+}
+
index d5914c3..e944905 100755 (executable)
@@ -67,7 +67,7 @@ class struct_var_set:
                 if 'barcode_length_range' in cfg:
                     self.bc_length_range = cfg['barcode_length_range']
                 else:
-                    self.bc_length_range = set(len, self.barcodes)
+                    self.bc_length_range = set(map(len, self.barcodes))
                 # ADDITIONAL variables for trimming and filtering:
                 self.count = 0
                 
@@ -141,6 +141,7 @@ def filter_reads(res):
     time_start = time.time()
     with open(str(res.input_data_file), 'r') as seq_file:
         file_format = res.input_data_file.split('.')[-1]
+        print('opened file '+res.input_data_file+' '+file_format+'\n')
         for record in SeqIO.parse(seq_file, file_format):
             tmp_seq = str(record.seq)
             L = len(tmp_seq)
@@ -184,7 +185,7 @@ def filter_reads(res):
         print "---#good_reads: " + str(len(res.good_reads[bc]))
         
     print 'Total:'
-    print res.count, [len(res.good_reads[bc]) for bc in res.barcodes], [int(np.sum(res.good_read_bad_pID_length[bc])) for bc in res.barcodes]
+    print res.count, [len(res.good_reads[bc]) for bc in res.barcodes], [int(np.sum(res.good_read_bad_pID_length[bc])) for bc in res.barcodes], np.sum(res.bad_read_length)
 
 #####
 def logfile_output(res, barcode):
@@ -277,6 +278,8 @@ def plot_number_of_reads_per_pID(res):
 # MAIN
 ######
 if __name__=='__main__':
+    if len(sys.argv)==2:
+        CONFIG_FILE_NAME = sys.argv[1]
     try:
         execfile(CONFIG_FILE_NAME)
     except:
index dce60a4..1d00530 100755 (executable)
@@ -27,7 +27,7 @@ if(len(sys.argv)==2):
             os.system(cmd)
 
 else:
-    print auto_file_name+': usage: '+auto_file_name+' <directory of a run>'
+    print sys.argv[1]+': usage: '+auto_file_name+' <directory of a run>'
 
 
 
index 04b88a6..86dcd01 100755 (executable)
@@ -17,7 +17,7 @@ import time
 import lib_tools as lt
 
 auto_file_name = str(sys.argv[0])
-
+verbose = 0
 ######
 # DEF FUNCTIONS
 ######
@@ -49,7 +49,7 @@ def make_consensus(file_name):
 
         return (str_consensus_2, np.sum(nb_reads_fwd), np.sum(nb_reads_rev))    
     else:
-        print 'file: '+file_name+': not enough reads (#reads < 3) to make consensus'
+        if verbose: print 'file: '+file_name+': not enough reads (#reads < 3) to make consensus'
         return ('nothing',0,0)
 #######
 
@@ -74,7 +74,6 @@ if __name__=='__main__':
                 print temp_directories
                 for temp_dir in temp_directories:
                     pID_files = glob.glob(temp_dir+'/*aligned.fasta')
-                    print pID_files
                     for pID_file in pID_files:
                         pID = lt.get_last_part_of_path(pID_file).split('_')[0]
                         with open(pID_file, 'r') as infile: