removed fastq, little changes in scripts (introduced glob, new consensus building)
authorRichard <richard.neher@tuebingen.mpg.de>
Tue, 10 Sep 2013 16:31:05 +0000 (18:31 +0200)
committerRichard <richard.neher@tuebingen.mpg.de>
Tue, 10 Sep 2013 16:32:27 +0000 (18:32 +0200)
.gitignore
figures/2013-09-04-Run1_SW_PID_copy_number.pdf [new file with mode: 0644]
figures/2013-09-04-Run1_SW_matching_read_length.pdf [new file with mode: 0644]
src/configFile.py
src/p1_trim_and_filter.py
src/p2_sort.py
src/p3_align.py
src/p3_cluster_align_main.py
src/p4_consensus.py

index 34d430c..37a18f3 100644 (file)
@@ -6,7 +6,9 @@
 auto
 .~*
 *fasta
+*fastq
 region_.tex
+*txt
 
 ### /home/fabio/.gitignore-boilerplates/Global/vim.gitignore
 *tar.gz
diff --git a/figures/2013-09-04-Run1_SW_PID_copy_number.pdf b/figures/2013-09-04-Run1_SW_PID_copy_number.pdf
new file mode 100644 (file)
index 0000000..b09984a
Binary files /dev/null and b/figures/2013-09-04-Run1_SW_PID_copy_number.pdf differ
diff --git a/figures/2013-09-04-Run1_SW_matching_read_length.pdf b/figures/2013-09-04-Run1_SW_matching_read_length.pdf
new file mode 100644 (file)
index 0000000..fdf03ae
Binary files /dev/null and b/figures/2013-09-04-Run1_SW_matching_read_length.pdf differ
index 153ed86..3328f8c 100644 (file)
@@ -6,9 +6,10 @@ cfg={
     'p5_virus_match':'TGGCAGTCTAGCAGAAGAAG',
     'p3_virus_match':'CCTCAGGAGGGGACCCAG',
     'barcodes':['TACG','ACGT','CGTA','GTAC'],
-    'input_data_file':'nomatch_R_2013_08_06_18_00_08_user_BAL-31-influenza6-10HIV1-3_Auto_user_BAL-31-influenza6-10HIV1-3_55.fastq',
+    'input_data_file':'subsample_iontorrent.fastq',
     'reverse':False,
     'min_occ_same_pid':1,
-    'min_length_pid':8
+    'min_length_pid':8,
+    'barcode_length_range':range(3,5)
 }
 
index 8142e0f..c12f850 100755 (executable)
@@ -34,7 +34,10 @@ class struct_var_set:
         #if(CONFIG_FILE_NAME in sys.argv):
         try:
             execfile(CONFIG_FILE_NAME)
-            if ('prefix_date_and_id' in cfg.keys() and 'p5_virus_match' in cfg.keys() and 'p3_virus_match' in cfg.keys() and 'input_data_file' in cfg.keys() and 'barcodes' in cfg.keys() and 'reverse' in cfg.keys()):
+            if ('prefix_date_and_id' in cfg.keys() and 'p5_virus_match' in cfg.keys() 
+                and 'p3_virus_match' in cfg.keys() and 'input_data_file' in cfg.keys() 
+                and 'barcodes' in cfg.keys() and 'reverse' in cfg.keys()):
+
                 # MANDATORY variables in the configuration file:
                 self.prefix_date_and_id = cfg['prefix_date_and_id']
                 self.p5_virus_match = cfg['p5_virus_match']
@@ -61,7 +64,10 @@ class struct_var_set:
                     self.min_length_pid = cfg['min_length_pid']
                 else:
                     self.min_length_pid = 8
-
+                if 'barcode_length_range' in cfg:
+                    self.bc_length_range = cfg['barcode_length_range']
+                else:
+                    self.bc_length_range = set(len, self.barcodes)
                 # ADDITIONAL variables for trimming and filtering:
                 self.count = 0
                 
@@ -114,9 +120,6 @@ def retrieve_pID_barcode_and_sequence(res, fwd, scores, seq, seq_id):
         if (bc in res.barcodes):
             if not (pID in res.good_reads[bc].keys()):
                 res.good_reads[bc][pID] = []
-                #res.good_reads_2[bc][pID] = []
-            #res.good_reads[bc][pID].append([seq_id, fwd==True and 'fwd' or 'rev', scores[0][0][scores[0][4]:scores[1][3]]])
-            #print str(bc)+' '+str(pID)+' '+str([seq_id, fwd==True and 'fwd' or 'rev', scores[0][0][scores[0][4]:scores[1][3]]])
             
             len_align_5p = len(scores[0][0])
             pos_end_align_5p_plus_one = scores[0][4]
@@ -126,12 +129,8 @@ def retrieve_pID_barcode_and_sequence(res, fwd, scores, seq, seq_id):
             pos_end_seq = pos_start_align_3p 
 
             res.good_reads[bc][pID].append([seq_id, fwd==True and 'fwd' or 'rev', str(seq[pos_start_seq:pos_end_seq])])
-            #res.good_reads_2[bc][pID].append([seq_id, fwd==True and 'fwd' or 'rev', str(seq[pos_start_seq:pos_end_seq])])
-            #print str(bc)+' '+str(pID)+' '+str([seq_id, fwd==True and 'fwd' or 'rev', str(seq[pos_start_seq:pos_end_seq])])
-
-            #print str(bc)+' '+str(pID)+' '+str([seq_id, fwd==True and 'fwd' or 'rev', str(seq)])
-            
             res.good_read_length[bc][L] += 1
+
     else: # pID too small to be valid
         if(bc in res.barcodes):
             res.good_read_bad_pID_length[bc][L] += 1
@@ -151,10 +150,9 @@ def filter_reads(res):
             
             # try forward
             prim_scores = lt.check_match_SW(tmp_seq, res.match_seqs)
-            if (prim_scores[0][2] >= res.p5_cutoff and
-                prim_scores[0][3] >= 3 and
-                prim_scores[0][3] <= 4 and
-                prim_scores[1][2] >= res.p3_cutoff
+            if (prim_scores[0][2] >= res.p5_cutoff and   #check 5 prime match
+                prim_scores[0][3] in res.bc_length_range and              #check barcode length
+                prim_scores[1][2] >= res.p3_cutoff       #check 3 prime match
                 ):
                 retrieve_pID_barcode_and_sequence(res, True, prim_scores, tmp_seq, record.id)
             else:
@@ -163,8 +161,7 @@ def filter_reads(res):
                     tmp_seq = str(Seq(tmp_seq, generic_dna).reverse_complement())
                     prim_scores = lt.check_match_SW(tmp_seq, res.match_seqs)
                     if (prim_scores[0][2] >= res.p5_cutoff and
-                        prim_scores[0][3] >= 3 and
-                        prim_scores[0][3] <= 4 and
+                        prim_scores[0][3] in res.bc_length_range and             #check barcode length
                         prim_scores[1][2] >= res.p3_cutoff
                         ):
                         retrieve_pID_barcode_and_sequence(res, False, prim_scores, tmp_seq, record.id)
@@ -181,10 +178,7 @@ def filter_reads(res):
     time_end = time.time()
     print 'computing time: ' + str(time_end - time_start)
     
-    for bc in res.barcodes: # sorts the pIDs for each barcode
-        res.good_reads[bc].keys().sort
-
-    for bc in res.good_reads.keys():
+    for bc in sorted(res.good_reads.keys()):
         print "barcode " + str(bc) + ":"
         #print res.good_reads[bc]
         print "---#good_reads: " + str(len(res.good_reads[bc]))
@@ -192,63 +186,6 @@ def filter_reads(res):
     print 'Total:'
     print res.count, [len(res.good_reads[bc]) for bc in res.barcodes], [int(np.sum(res.good_read_bad_pID_length[bc])) for bc in res.barcodes]
 
-
-#####
-# def input_data_file_analysis(res):
-#     '''
-#     function that filters the good/bad reads from the input fastq data file
-#     res is the structure containing the filtering operation results
-#     '''
-#     with open(res.path_input_data_file, 'r') as seq_file:
-#         for record in SeqIO.parse(seq_file, "fastq"):
-#             tmp_seq = str(record.seq)
-#             L = len(tmp_seq)
-#             bc = tmp_seq[:4]
-#             if (bc in res.barcodes): #barcode recognition
-#                 # check distances of sample code, 5' and 3' primers
-#                 d1 = check_match_SW(tmp_seq, [res.match_seqs[0]])[0]
-#                 res.good_tag_only_read_length[bc][L] += 1
-#                 if (d1[2] > res.p5_cutoff and d1[3] == 4): # good match at 5' end
-#                     d2 = check_match_SW(tmp_seq, [res.match_seqs[1]])[0]
-#                     res.good_fwd_read_length[bc][L] += 1
-#                     if (d2[2] > res.p3_cutoff): # and d2[4] > 200 and d2[4] < len(d2[0]) - 9): # good match at 3' end
-#                         #record.id = read name : not necessary anymore
-#                         #d2[0][d2[4]:] = pID
-#                         #d2[0][4:d2[4]] = sequence
-#                         pID = d2[0][d2[4]:]
-#                         sequence = d2[0][4:d2[4]]
-                        
-#                         if(len(pID) >= res.min_length_pid):
-#                             #print "pID ok"
-#                             if not (pID in res.good_reads[bc].keys()):
-#                                 res.good_reads[bc][pID]=Counter([sequence])
-#                             else:
-#                                 res.good_reads[bc][pID].update([sequence])
-#                             res.good_read_length[bc][L] += 1
-#                         else:
-#                             #print "pID ko"
-#                             res.good_read_bad_pID_length[bc][L]+=1
-#                     else:
-#                         res.bad_read_length[L] += 1
-#                 else:
-#                     res.bad_read_length[L] += 1
-#             else:
-#                 res.bad_read_length[L] += 1
-#             res.count += 1
-#             #control
-#             if (res.count % 1000 == 0):
-#                 print res.count, [len(res.good_reads[bc]) for bc in res.barcodes]#, [np.sum(res.good_tag_only_read_length[bc]) for bc in res.barcodes], [np.sum(res.good_fwd_read_length[bc]) for bc in res.barcodes]
-#         print 'Total:'
-#         print res.count, [len(res.good_reads[bc]) for bc in res.barcodes], [int(np.sum(res.good_tag_only_read_length[bc])) for bc in res.barcodes], [int(np.sum(res.good_fwd_read_length[bc])) for bc in res.barcodes], [int(np.sum(res.good_read_bad_pID_length[bc])) for bc in res.barcodes]
-
-#         for bc in res.barcodes: # sorts the pIDs for each barcode
-#             res.good_reads[bc].keys().sort
-    
-#         for bc in res.barcodes:
-#             print "-----barcode: " + str(bc)
-#             for pID in res.good_reads[bc].keys():
-#                 print "--------sum for pID : " + str(pID)
-#                 print str(sum(res.good_reads[bc][pID].values()))
 #####
 def logfile_output(res, barcode):
     '''
@@ -274,15 +211,16 @@ def export_good_reads_to_fasta_file_for_consensus_compress(res, barcode, mosp):
     mosp min_occ_same_pid
     '''
     with open('../data/' + res.prefix_date_and_id + '_' + barcode + '_filtered-reads-SW-mosp-' + str(mosp) + '.fasta', 'w') as out_file:
-        for pID, count_seq in res.good_reads[barcode].iteritems():
+        for pID in sorted(res.good_reads[barcode].keys()):
+            count_seq = res.good_reads[barcode][pID]
             if len(count_seq) >= mosp: # write the reads corresponding to pID if this one appears at least mosp times
                 # count_seq = list of [id, 'fwd'/'rev', seq] corresponding to pID
-                dict_seq = defaultdict(Counter)
-                for seq_read in count_seq:
-                    dict_seq[seq_read[2]].update([seq_read[1]])
+                dict_seq = defaultdict(lambda:{'fwd':0, 'rev':0})
+                for seq_read in count_seq: # count forward and reverse reads
+                    dict_seq[seq_read[2]][seq_read[1]]+=1
                 for read_to_write in dict_seq.keys():
-                    nb_fwd = 'fwd' in dict_seq[read_to_write].keys() and dict_seq[read_to_write]['fwd'] or 0
-                    nb_rev = 'rev' in dict_seq[read_to_write].keys() and dict_seq[read_to_write]['rev'] or 0
+                    nb_fwd = dict_seq[read_to_write]['fwd']
+                    nb_rev = dict_seq[read_to_write]['rev']
                     out_file.write('>' + pID + '_' + str(nb_fwd) + '_' + str(nb_rev) + '\n')
                     out_file.write(read_to_write+'\n')
                 
@@ -328,42 +266,40 @@ def plot_number_of_reads_per_pID(res):
 ######
 # MAIN
 ######
-try:
-    execfile(CONFIG_FILE_NAME)
-except:
-    print "messed up config file"         
-    
-try:
-    res = struct_var_set() # initialisation of the structure that will contain the filtering operation results
-except:
-    print 'Problem for initialisation: there is something wrong with the configuration file ' + CONFIG_FILE_NAME + '!\n'
-
-####
-# Filter the data file
-####
-print '--analysis of the input data file: ' + res.input_data_file
-filter_reads(res)
-
-
-
-
-####
-# Export the log and fasta files
-####    
-for bc in res.barcodes:
-    print '--generating the log file for the barcode ' + str(bc)
-    logfile_output(res, bc)
-
-#for bc in res.barcodes:
-    print '--exporting the good reads (barcode: ' + str(bc) +') occurring with at least ' + str(res.min_occ_same_pid) + ' identical pIDs (with length >= ' + str(res.min_length_pid)  + ') to make consensus'
-    # generate a fasta file that contains only good reads with at least min_occ_same_pid identical pIDs to make consensus
-    export_good_reads_to_fasta_file_for_consensus_compress(res, bc, res.min_occ_same_pid)
-####
-# Plots
-####
-
-## read length distribution
-plot_read_length_distribution(res)
-
-## number of reads per pID
-plot_number_of_reads_per_pID(res)
+if __name__=='__main__':
+    try:
+        execfile(CONFIG_FILE_NAME)
+    except:
+        print "messed up config file"         
+
+    try:
+        res = struct_var_set() # initialisation of the structure that will contain the filtering operation results
+    except:
+        print 'Problem for initialisation: there is something wrong with the configuration file ' + CONFIG_FILE_NAME + '!\n'
+
+    ####
+    # Filter the data file
+    ####
+    print '--analysis of the input data file: ' + res.input_data_file
+    filter_reads(res)
+
+    ####
+    # Export the log and fasta files
+    ####    
+    for bc in res.barcodes:
+        print '--generating the log file for the barcode ' + str(bc)
+        logfile_output(res, bc)
+
+    #for bc in res.barcodes:
+        print '--exporting the good reads (barcode: ' + str(bc) +') occurring with at least ' + str(res.min_occ_same_pid) + ' identical pIDs (with length >= ' + str(res.min_length_pid)  + ') to make consensus'
+        # generate a fasta file that contains only good reads with at least min_occ_same_pid identical pIDs to make consensus
+        export_good_reads_to_fasta_file_for_consensus_compress(res, bc, res.min_occ_same_pid)
+    ####
+    # Plots
+    ####
+
+    ## read length distribution
+    plot_read_length_distribution(res)
+
+    ## number of reads per pID
+    plot_number_of_reads_per_pID(res)
index 7c15118..85b64af 100755 (executable)
@@ -45,6 +45,7 @@ if (len(sys.argv) == 2):
     relative_path_to_filtered_reads_file = str(sys.argv[1])
     path_to_data_file = "../data/"
     path_to_templates = "../templates/"
+    lt.check_and_create_directory(path_to_templates)
     filtered_reads_file_basename = relative_path_to_filtered_reads_file.split('/')[-1]
     dict_pIDs = defaultdict(list)
     
index e79b2ca..bd6d282 100755 (executable)
@@ -14,6 +14,7 @@ import os
 import sys
 import time
 import lib_tools as lt
+import glob
 
 auto_file_name = str(sys.argv[0])
 
@@ -31,25 +32,19 @@ if (len(sys.argv)==2):
     if relative_path_to_temp_directory[-1]!='/':
         relative_path_to_temp_directory+='/'
     path_to_templates='../templates/'
-    #path_to_input_dir = relative_path_to_temp_directory[:13] # "../templates/" 
-    #temp_directory_basename = input_directory_name[13:] # "../templates/" to remove
     temp_directory_basename = relative_path_to_temp_directory.split('/')[-2]
     print temp_directory_basename
     [prefix_date_and_id, bc]= [temp_directory_basename.split('_')[i] for i in [0,2]]
     prefix_date_and_id = prefix_date_and_id[len('dir-'):] # "dir-" to remove
-    #if bc[-1]=='/':
-    #    bc= bc[:-1] # "/" to remove
     print prefix_date_and_id, bc
+
     ####
     # collect the temp reads files to align for the given barcode
     ####
-    list_temp_files = os.popen('ls '+relative_path_to_temp_directory+'*').readlines()
-    list_temp_files = [list_temp_files[i].split('/')[-1].strip() for i in range(len(list_temp_files))]
+    list_temp_files = glob.glob(relative_path_to_temp_directory+'*')
+
     # list of "pIDs" to generate the pID alignment files 
-    list_of_pIDs = [list_temp_files[i].split('_')[3] for i in range(len(list_temp_files))]
-    # cut the extension ".fasta" of pIDs
-    for i in range(len(list_of_pIDs)):
-        list_of_pIDs[i] = list_of_pIDs[i][:-6]# length(".fasta")=6
+    list_of_pIDs = [fname.split('_')[-1].split('.')[0] for fname in list_temp_files]
     print 'list_of_pIDs created, length: ' + str(len(list_of_pIDs))
     set_of_pIDs = set(list_of_pIDs) 
     print 'no duplicates in pIDs list: '+str(len(set_of_pIDs)==len(list_of_pIDs))
@@ -59,9 +54,8 @@ if (len(sys.argv)==2):
     ####
     #create the directory for the aligned reads
     dir_align_name_bc = str(path_to_templates+'dir-'+prefix_date_and_id+'_align_'+bc)
-    #print dir_align_name_bc
     dir_temp_name_bc =  str(path_to_templates+'dir-'+prefix_date_and_id+'_temp_'+bc)
-    #print dir_temp_name_bc
+
     lt.check_and_create_directory(dir_align_name_bc)
     time_start = time.time()
     for pID in list_of_pIDs:
@@ -75,52 +69,7 @@ if (len(sys.argv)==2):
     time_end = time.time()
     print 'computation time: '+str(time_end-time_start)
 
-    #else:
-    #    print 'no file created'
-    
-    
-
 else:
     print auto_file_name+': usage: '+auto_file_name+' <temp directory containing temp reads files to align (in ../templates/)>'
 
 
-
-####
-#collect the list of specific barcode/pID temp files to align (files generated by pipeline2 in dir. templates)
-####
-# list_temp_files = os.popen('ls -l ../templates/'+prefix_date_and_id+'_temp_*').readlines()
-# list_temp_files = [list_temp_files[i].split(' ')[-1].split('\n')[0] for i in range(len(list_temp_files))]
-# # list of "triples" [date+id,barcode,pID] to generate the pID alignement files 
-# list_file_names = [[list_temp_files[i].split('_')[j] for j in [0,2,3]] for i in range(len(list_temp_files))]
-# # cut the extension ".fasta" of pIDs and the prefix "../templates/" to the date+id
-# for i in range(len(list_file_names)):
-#     list_file_names[i][2] = list_file_names[i][2][:-6]# length(".fasta")=6
-#     list_file_names[i][0] = list_file_names[i][0][13:]# length("../templates/")=13
-# print 'list_file_names created, length: ' + str(len(list_file_names))
-
-
-
-####
-#align the reads for each temp file
-####
-#for alignments per interval if parallelisation needed
-# if len(sys.argv)==3:
-#     first = int(sys.argv[1])
-#     last = int(sys.argv[2])
-# else:
-#     first=0
-#     last=len(list_file_names)
-# time_start = time.time()
-# for cur_file in list_file_names[first:last]:
-#     temp_file_name = '../templates/' + cur_file[0] + '_temp_' + cur_file[1] + '_' + cur_file[2] + '.fasta'
-#     align_file_name = '../templates/' + cur_file[0] + '_pID_' + cur_file[1] + '_' + cur_file[2] + '.fasta'
-#     print 'Alignment for file: ' + temp_file_name
-#     cline = MuscleCommandline(input = '../templates/' + cur_file[0] + '_temp_' + cur_file[1] + '_' + cur_file[2] + '.fasta', out = '../templates/' + cur_file[0] + '_pID_' + cur_file[1] + '_' + cur_file[2] + '.fasta')
-    
-    
-#     #
-#     cline()
-#     #
-# time_end = time.time()
-# print 'computation time: '+str(time_end-time_start)
-    
index 4a30802..21170ce 100755 (executable)
@@ -21,8 +21,7 @@ if(len(sys.argv)==2):
     cluster_dir_base_name = cluster_dir[len(path_to_dir):] # "../templates/" to remove
     
     #collect the list of temp reads files to align
-    list_temp_files = os.popen(str('ls '+cluster_dir+'*')).readlines()
-    list_temp_files = [file_to_strip.strip() for file_to_strip in list_temp_files]
+    list_temp_files = glob.glob(cluster_dir+'*')
     print list_temp_files
     #run one job per file to align on the cluster
     for cur_file in list_temp_files:
index 8dbc8ad..40c0a00 100755 (executable)
@@ -16,6 +16,7 @@ from collections import Counter
 from collections import defaultdict
 import os
 import sys
+import glob
 import time
 import datetime
 import lib_tools as lt
@@ -31,22 +32,26 @@ auto_file_name = str(sys.argv[0])
 def make_consensus(file_name, pID):
     #print 'consensus for file: '+file_name
     alignment = AlignIO.read(file_name,"fasta")
-    len_align = len(alignment)
-    nb_reads_fwd = sum([int(alignment[i].id.split('_')[1]) for i in range(len_align)])
+    nb_reads_fwd = np.asarray([int(seq.id.split('_')[1]) for seq in alignment], int)
     #print nb_reads_fwd
-    nb_reads_rev = sum([int(alignment[i].id.split('_')[2]) for i in range(len_align)])
+    nb_reads_rev = np.asarray([int(seq.id.split('_')[2]) for seq in alignment], int)
     #print nb_reads_rev
     nb_reads = nb_reads_fwd + nb_reads_rev
-    len_seq = alignment.get_alignment_length()
-    if(nb_reads >= 3):
+
+    alignment = np.asarray(alignment)
+    len_align,len_seq = alignment.shape
+    alpha = np.array(list('ATCG-N'))
+    nuc_counts = np.zeros((alpha.shape[0], len_seq))
+    if(np.sum(nb_reads) >= 3):
         #compute the consensus sequence taking into account the numbers of fwd/rev reads corresponding to the pID
-        
-        str_consensus = "".join([Counter("".join([alignment[i,j]*(int(alignment[i].id.split('_')[1])+int(alignment[i].id.split('_')[2])) for i in range(len_align)])).most_common(1)[0][0] for j in range(len_seq)])
+        for ni,nuc in enumerate(alpha):
+            nuc_counts[ni] = np.sum((alignment == nuc)*np.repeat(np.array([nb_reads]).T, len_seq, axis=1), axis=0)
+        str_consensus = "".join(alpha[np.argmax(nuc_counts, axis=0)])
         
         #remove gaps
-        str_consensus_2 = "".join([i!='-' and i or '' for i in str_consensus])
+        str_consensus_2 = "".join([nuc for nuc in str_consensus if nuc!='-'])
 
-        return (str_consensus_2, nb_reads_fwd, nb_reads_rev)    
+        return (str_consensus_2, np.sum(nb_reads_fwd), np.sum(nb_reads_rev))    
     else:
         print 'file: '+file_name+': not enough reads (#reads < 3) to make consensus'
         return ('nothing',0,0)
@@ -54,38 +59,37 @@ def make_consensus(file_name, pID):
 
 
 
+if __name__=='__main__':
+    if (len(sys.argv)==2):
+        # parse the input aligned files directory name
+        relative_path_to_align_dir=str(sys.argv[1])
+        if(relative_path_to_align_dir[-1]!='/'):
+            relative_path_to_align_dir+='/'
+        path_to_templates = "../templates/"
+        align_dir_basename = relative_path_to_align_dir.split('/')[2]
+        [prefix_date_and_id,file_type,barcode] = [align_dir_basename.split('dir-')[1].split('_')[i] for i in [0,1,2]]
 
-if (len(sys.argv)==2):
-    # parse the input aligned files directory name
-    relative_path_to_align_dir=str(sys.argv[1])
-    if(relative_path_to_align_dir[-1]!='/'):
-        relative_path_to_align_dir+='/'
-    path_to_templates = "../templates/"
-    align_dir_basename = relative_path_to_align_dir.split('/')[2]
-    [prefix_date_and_id,file_type,barcode] = [align_dir_basename.split('dir-')[1].split('_')[i] for i in [0,1,2]]
-
-    # create (if necessary) the consensus directory
-    lt.check_and_create_directory(str(path_to_templates+'dir-'+prefix_date_and_id+'_consensus'))
-    
-    # get the list of files to consensus (one per pID)
-    list_files_to_consensus = os.popen(str('ls '+relative_path_to_align_dir+'*')).readlines()
-    dict_consensus_seq = {}
-    for cur_file in list_files_to_consensus:
-        cur_file = cur_file.strip() # remove '\n'
-        cur_file_pID = cur_file.split('_')[-1].split('.')[0] #"<prefix_date_and_id>_align_<barcode>_<pID>.fasta"
-        cur_file_barcode = cur_file.split('_')[2]
-        #print cur_file
-        result_consensus_cur_file = make_consensus(cur_file,cur_file_pID)
-        if (result_consensus_cur_file != ('nothing',0,0)): # store the consensus sequence
-            dict_consensus_seq[cur_file_pID]=result_consensus_cur_file
-        #print result_consensus_cur_file
-    # write the consensus sequences of each pID (with #occ >= 3) in the specific consensus seq file for the given barcode
-    with open(str(path_to_templates+'dir-'+prefix_date_and_id+'_consensus/'+prefix_date_and_id+'_consensus_'+barcode+'.fasta'),'w') as out_file:
-        for pID in sorted(dict_consensus_seq.keys()):
-            out_file.write('>'+pID+'_'+str(dict_consensus_seq[pID][1])+'_'+str(dict_consensus_seq[pID][2])+'\n')
-            out_file.write(dict_consensus_seq[pID][0]+'\n')
-        
-    
-else:
-    print auto_file_name+': usage: '+auto_file_name+' <aligned reads files directory (in ../templates/)>'
+        # create (if necessary) the consensus directory
+        lt.check_and_create_directory(str(path_to_templates+'dir-'+prefix_date_and_id+'_consensus'))
+
+        # get the list of files to consensus (one per pID)
+        list_files_to_consensus = glob.glob(relative_path_to_align_dir+'*')
+        dict_consensus_seq = {}
+        for cur_file in list_files_to_consensus:
+            cur_file_pID = cur_file.split('_')[-1].split('.')[0] #"<prefix_date_and_id>_align_<barcode>_<pID>.fasta"
+            cur_file_barcode = cur_file.split('_')[2]
+            #print cur_file
+            result_consensus_cur_file = make_consensus(cur_file,cur_file_pID)
+            if (result_consensus_cur_file != ('nothing',0,0)): # store the consensus sequence
+                dict_consensus_seq[cur_file_pID]=result_consensus_cur_file
+            #print result_consensus_cur_file
+        # write the consensus sequences of each pID (with #occ >= 3) in the specific consensus seq file for the given barcode
+        with open(str(path_to_templates+'dir-'+prefix_date_and_id+'_consensus/'+prefix_date_and_id+'_consensus_'+barcode+'.fasta'),'w') as out_file:
+            for pID in sorted(dict_consensus_seq.keys()):
+                out_file.write('>'+pID+'_'+str(dict_consensus_seq[pID][1])+'_'+str(dict_consensus_seq[pID][2])+'\n')
+                out_file.write(dict_consensus_seq[pID][0]+'\n')
+
+
+    else:
+        print auto_file_name+': usage: '+auto_file_name+' <aligned reads files directory (in ../templates/)>'