contiued streamlining
authorRichard Neher <richard@ag-neher-xps13.(none)>
Thu, 19 Sep 2013 11:42:42 +0000 (13:42 +0200)
committerRichard Neher <richard@ag-neher-xps13.(none)>
Thu, 19 Sep 2013 11:42:42 +0000 (13:42 +0200)
src/p1_trim_and_filter.py
src/p2_sort.py
src/p4_consensus.py
src/p6_align_global_per_barcode.py
src/p7_detect_mutants_indels.py

index aa06f02..d5914c3 100755 (executable)
@@ -194,7 +194,9 @@ def logfile_output(res, barcode):
     barcode
     '''
     lt.check_and_create_directory(res.runid)
-    with  open(res.runid+'/bc_'+barcode+'_summary.txt', 'w') as log_file:
+    barcode_dir  = res.runid+'/bc_'+barcode+'_analysis'
+    lt.check_and_create_directory(barcode_dir)
+    with  open(barcode_dir+'/filter_summary.txt', 'w') as log_file:
         log_file.write('Barcode: ' + str(barcode) + '\n')
         log_file.write('Total number of reads (all barcodes): ' + str(res.count) + '\n')
         log_file.write('Number of pIDs: ' + str(len(res.good_reads[bc])) + '\n')
@@ -215,7 +217,9 @@ def export_good_reads_to_fasta_file_for_consensus_compress(res, barcode, mosp):
     # make directory for this run
     ####
     lt.check_and_create_directory(res.runid)
-    with open(res.runid+'/bc_'+barcode+'_min_'+str(mosp)+'.fasta', 'w') as out_file:
+    barcode_dir  = res.runid+'/bc_'+barcode+'_analysis'
+    lt.check_and_create_directory(barcode_dir)
+    with open(barcode_dir+'/filtered_reads.fasta', 'w') as out_file:
         for pID in sorted(res.good_reads[barcode].keys()):
             count_seq = res.good_reads[barcode][pID]
             if len(count_seq) >= mosp: # write the reads corresponding to pID if this one appears at least mosp times
index b716deb..fb3a3ef 100755 (executable)
@@ -39,51 +39,45 @@ batchsize = 10
 ######
 # MAIN
 ######
-if (len(sys.argv) > 1):
-    relative_path_to_filtered_reads_file = str(sys.argv[1])
-    if len(sys.argv)>2:
-        analysis_type = '_'+sys.argv[2]
-    else:
-        analysis_type = ''
-    filtered_reads_file = lt.get_last_part_of_path(relative_path_to_filtered_reads_file)
-    path_base = lt.get_base_of_path(relative_path_to_filtered_reads_file)
-    barcode = filtered_reads_file.split('_')[1]
+if (len(sys.argv) ==3):
+    rundir = str(sys.argv[1]).rstrip('/')+'/'
+    readtype = sys.argv[2]
 
-    #create the directory for the given barcode
-    analysis_dir = path_base+'/bc_'+barcode+'_analysis'+analysis_type
-    lt.check_and_create_directory(analysis_dir)
+    bc_dir_list = glob.glob(rundir+'bc_*_analysis')
+    for bc_dir in bc_dir_list:
+        reads_file = bc_dir.rstrip('/')+'/'+readtype+'_reads.fasta'
 
-    # create directory for batch 0
-    batch=0
-    temp_pid_files_dir =analysis_dir+'/temp_'+"{0:04d}".format(batch)
-    lt.check_and_create_directory(temp_pid_files_dir)
-    dict_pIDs = defaultdict(list)
+        # create directory for batch 0
+        batch=0
+        temp_pid_files_dir =analysis_dir+'/temp_'+readtype+'_'+"{0:04d}".format(batch)
+        lt.check_and_create_directory(temp_pid_files_dir)
+        dict_pIDs = defaultdict(list)
     
     #import the file containing, for the given barcode, the sequences for each pID
     #generate the pID specific temp files for alignments
-    with open(relative_path_to_filtered_reads_file, 'r') as input_file:
-        count=0
-        for record in SeqIO.parse(input_file, 'fasta'):
-            pID = str(record.id.split('_')[0])
-            dict_pIDs[pID].append((record.id,record.seq))
-
-        all_pIDs = sorted(dict_pIDs.keys())
-        for pii, pID in enumerate(all_pIDs):
-            # write the temp files for each pID in the corresponding barcode directory
-            with open(temp_pid_files_dir+'/'+ pID+'.fasta', 'w') as output_pID_file:
-                for read in dict_pIDs[pID]:
-                    output_pID_file.write(str('>'+read[0]+'\n'))
-                    output_pID_file.write(str(read[1]+'\n'))
-                    count+=1
-                    if(count%500==0):
+        with open(reads_file, 'r') as input_file:
+            count=0
+            for record in SeqIO.parse(input_file, 'fasta'):
+                pID = str(record.id.split('_')[0])
+                dict_pIDs[pID].append((record.id,record.seq))
+                
+                all_pIDs = sorted(dict_pIDs.keys())
+                for pii, pID in enumerate(all_pIDs):
+                    # write the temp files for each pID in the corresponding barcode directory
+                    with open(temp_pid_files_dir+'/'+ pID+'.fasta', 'w') as output_pID_file:
+                        for read in dict_pIDs[pID]:
+                            output_pID_file.write(str('>'+read[0]+'\n'))
+                            output_pID_file.write(str(read[1]+'\n'))
+                            count+=1
+                            if(count%500==0):
                         print 'count = ' + str(count)
-            if ((batch+1)*batchsize<pii):
-                batch+=1
-                temp_pid_files_dir =analysis_dir+'/temp_'+"{0:04d}".format(batch)
-                lt.check_and_create_directory(temp_pid_files_dir)
-
-        print 'total : ' + str(count)
+                        if ((batch+1)*batchsize<pii):
+                            batch+=1
+                            temp_pid_files_dir =analysis_dir+'/temp_'+readtype+'_'+"{0:04d}".format(batch)
+                            lt.check_and_create_directory(temp_pid_files_dir)
+                            
+                            print 'total : ' + str(count)
         #else:
         #    print 'no file created'
 else: 
-    print auto_file_name+': usage: '+auto_file_name+' <filtered reads file (in ../data/)>'
+    print auto_file_name+': usage: '+auto_file_name+' <run directory> <readtype>'
index d7cb815..f2ef9ca 100755 (executable)
@@ -55,17 +55,14 @@ def make_consensus(file_name):
 
 
 if __name__=='__main__':
-    if(len(sys.argv)>1):
+    if(len(sys.argv)==3):
         # parse the input consensus sequences file name
         analysis_dir=str(sys.argv[1]).rstrip('/')+'/'
-        if len(sys.argv)>2:
-            analysis_type = '_'+sys.argv[2]
-        else:
-            analysis_type = ''
+        readtype = '_'+sys.argv[2]
         
-        temp_directories = glob.glob(analysis_dir+'temp_*')
-        consensus_fname = analysis_dir+'consensus_sequences'+analysis_type+'.fasta'
-        aligned_reads_fname = analysis_dir+'aligned_reads'+analysis_type+'.fasta'
+        temp_directories = glob.glob(analysis_dir+'temp_'+readtype+'*')
+        consensus_fname = analysis_dir+'consensus_sequences'+readtype+'.fasta'
+        aligned_reads_fname = analysis_dir+'aligned_reads'+readtype+'.fasta'
 
         with open(consensus_fname, 'w') as consensus_file, \
                 open(aligned_reads_fname, 'w') as aligned_reads_file:
index 05bd2b7..2fe6577 100755 (executable)
@@ -28,27 +28,18 @@ if(len(sys.argv)>1):
     if len(sys.argv)>2:
         analysis_type = '_'+sys.argv[2]
 
-    path_to_templates = "../templates/"
-    cons_seq_file_basename = lt.get_last_part_of_path(relative_path_to_cons_seq_file)
-    
-    [prefix_date_and_id,file_type,barcode] = [lt.trim_extension(cons_seq_file_basename).split('_')[i] for i in [0,1,2]]
-
-    # create (if necessary) the consensus directory
-    outpath = path_to_templates+'dir-'+prefix_date_and_id+'_align-global'
-    lt.check_and_create_directory(outpath)
-    
-    # align
-    aligned_cons_seq_file_name = outpath+'/'+prefix_date_and_id+'_align-global_'+barcode+'.fasta'
-    print 'Global consensus sequences alignment for barcode: ' + barcode + ' (from file: ' + relative_path_to_cons_seq_file + ' )'
-
-    cline = MuscleCommandline(input = relative_path_to_cons_seq_file, out = aligned_cons_seq_file_name)
-    
-    time_start = time.time()
-    #
-    cline()
-    #
-    time_end = time.time()
-    print 'alignment computation time: ' + str(time_end - time_start)
+    barcode_directories = glob.glob(rundir+'bc_*_analysis*')
+    for dname in barcode_directories:
+        consensus_file_list = glob.glob(dname+'/consensus*fasta')
+        time_start = time.time()
+        for fname in consensus_file_list:
+            aligned_fname = lt.trim_extension(fname)+'_aligned.fasta'
+
+            cline = MuscleCommandline(input = fname, out = aligned_fname)
+            cline()
+
+        time_end = time.time()
+        print 'alignment computation time: ' + str(time_end - time_start)
 
 else:
-    print auto_file_name + ': usage: '+ auto_file_name + ' <consensus sequences file (in ../templates/dir-<date_and_id>_consensus)>'
+    print auto_file_name + ': usage: '+ auto_file_name + ' <directory containing the directories for each barcode containing the consensus sequence files'
index c0ce445..9cd7801 100755 (executable)
@@ -47,171 +47,157 @@ min_pid_alignment_score = 8.4
 
 if (len(sys.argv) == 3):
     # parse the input data files names
-    relative_path_to_filtered_reads_file = str(sys.argv[1])
-    relative_path_to_cons_seq_file = str(sys.argv[2])
-    
-    path_to_data = '../data/'
-    path_to_templates = '../templates/'
-    
-    filtered_reads_file_basename = lt.get_last_part_of_path(relative_path_to_filtered_reads_file)
-    cons_seq_file_basename = lt.get_last_part_of_path(relative_path_to_cons_seq_file)
-
-    [prefix_date_and_id, barcode] = [lt.trim_extension(filtered_reads_file_basename).split('_')[i] for i in [0,1]]
-    [prefix_date_and_id_cs, barcode_cs] = [lt.trim_extension(cons_seq_file_basename).split('_')[i] for i in [0,2]]
-
-    # check the similarity
-    if ((barcode == barcode_cs) and (prefix_date_and_id == prefix_date_and_id_cs)):
+    barcode_dir = str(sys.argv[1]).rstrip('/')+'/'
+    readtype = sys.argv[2]
       
-        # 1. Create the dictionary of pIDs (from the output files of p1_trim_and_filter)
-        
-        ###################################
-        # -- READ THE FILTERED READS FILE #
-        ###################################
-        # dictionary containing the list of reads (values) associated to a pID (keys)
-        dict_all_reads, count_reads_added_to_dict_all_reads, count_reads_with_invalid_pID = lt.parse_readfile(relative_path_to_filtered_reads_file)
-        # control prints
-        ####
-        print 'count_reads_with_invalid_pID: '+str(count_reads_with_invalid_pID)
-        print 'count_reads_added_to_dict_all_reads: '+str(count_reads_added_to_dict_all_reads)
-        print '#pIDs different: '+str(len(dict_all_reads.keys()))
-        
-
-        pID_abundance = {}
-            
-        for pID,reads in dict_all_reads.iteritems():
-            pID_abundance[pID] = sum([x[0]+x[1] for x in reads])
-
-        print '#pIDs occurring only once: ' + str([x==1 for x in pID_abundance.values()])
-        print '#pIDs occurring only twice: ' + str([x==2 for x in pID_abundance.values()])
-        print '#pIDs occurring at least 3 times: ' + str([x>2 for x in pID_abundance.values()])
-        ####
-
-        
-        ########################################
-        # -- READ THE CONSENSUS SEQUENCES FILE #
-        ########################################
-        # dictionary containing the consensus sequence (values) associated to a pID (keys) if it occurs at least 3 times
-        dict_cons_seq, a, b = lt.parse_readfile(relative_path_to_cons_seq_file)
-        print 'From consensus seq. file :'
-        print '#pIDs occurring at least 3 times: ' + str(len(dict_cons_seq.keys()))
-        
-
-        ###########################
-        # -- SEARCH THE NEIGHBORS #
-        ###########################
-        # dictionary indicating the "neighbor" state of each pID
-        dict_neighbor_state_pIDs = {}
-        for pID in dict_all_reads.keys():
-            dict_neighbor_state_pIDs[pID] = False
-        
-        list_occ_pIDs_dec = sorted(pID_abundance.items(), key = lambda x:x[0], reverse=True)
-        list_occ_pIDs_inc = list_occ_pIDs_dec[::-1]
-
-        print 'Check list_occ_pIDs_dec and list_occ_pIDs_inc lengths (should be equal): '        
-        print len(list_occ_pIDs_dec), len(list_occ_pIDs_inc)
-         
-        # dictionary containing the parent pID (values) of a given pID (keys)
-        dict_neighbor_of = defaultdict(list)
-        dict_neighbors = defaultdict(list)
-        
-        lim_pID_a = [list_occ_pIDs_inc[i][0]>=MIN_OCC_MOST_ABUND_PID for i in range(len(list_occ_pIDs_inc))].count(True)
-        print 'lim_pID_a:'+str(lim_pID_a)
-        print 'nb occ pID_a at pos lim_pID_a-1 in list_occ_pIDs_dec: '+str(list_occ_pIDs_dec[lim_pID_a-1][0])
-        print 'nb occ pID_a at pos lim_pID_a in list_occ_pIDs_dec: '+str(list_occ_pIDs_dec[lim_pID_a][0])
-
-        for ii_a, pID_a in enumerate(list_occ_pIDs_dec[:lim_pID_a]):
-            # this condition should always evaluate to true
-            if dict_neighbor_state_pIDs[pID_a[1]] == False:
-                continue
-            # loop over rare pid
-            for ii,pID_r in enumerate(list_occ_pIDs_inc[:-(ii_a+1)]):
-                # check whether this PID is already a neighbor of some other
-                 if((dict_neighbor_state_pIDs[pID_r[1]] == False) and (pID_r[0] < pID_a[0])):
-                    pIDs_align_score = align.localms(pID_a[1], pID_r[1], 1, 0, -0.6, -0.3)
-                    if (len(pIDs_align_score) != 0): # if pID_a[1] and pID_r[1] can be aligned
-                        if (pIDs_align_score[0][2] >= min_pid_alignment_score):
-                            print 'pID_a: '+str(pID_a)+', pID_r: '+str(pID_r)
-                            print pIDs_align_score[0][0], pIDs_align_score[0][1]
-                            print dict_neighbor_state_pIDs[pID_a[1]], dict_neighbor_state_pIDs[pID_r[1]]
-                            print "possible neighbors"
-
-                            if pID_r[0]>2: 
-                                neighbor_candidates = dict_cons_seq[pID_r[1]]
-                            else:
-                                neighbor_candidates = dict_all_reads[pID_r[1]]
-
-                                                                   
-                            if (pID_r[0] != 2): # pID_r occurs once or more than twice: only one sequence comparison to do
-                                seq_a = dict_cons_seq[pID_a[1]][0][2] 
-                                if(pID_r[0] == 1): # pID_r occurs once: its sequence is in dict_all_reads
-                                    # seq_r = dict_all_reads[pID_r[0]][0][2]
-                                    seq_r = dict_all_reads[pID_r[1]][0][2] # [nb_fwd,nb_rev,seq]
-                                else: # pID_r occurs at least 3 times: its consensus sequence is in dict_cons_seq
-                                    seq_r = dict_cons_seq[pID_r[1]][0][2] # [nb_fwd,nb_rev,seq]
-
-                            for nf, nr, seq_r in neighbor_candidates:
-                                if lt.check_neighbor_plausibility(seq_a, seq_r, DIST_MAX):
-                                    dict_neighbors[pID_a[1]].append(pID_r[1])
-                                    dict_neighbor_of[pID_r[1]] = pID_a[1]
-                                    dict_neighbor_state_pIDs[pID_r[1]] = True
-                                    print "NEIGHBORS !"
-                                    break
-                
-        
-        print 'neighbors:'
-        nb_neighbors_stated = 0
-        for pID in dict_neighbor_state_pIDs.keys():
-            if dict_neighbor_state_pIDs[pID]==True:
-                nb_neighbors_stated += 1
-                print pID
-
-        
-        
-        ##############################################
-        # -- WRITE THE NEIGHBORS FILTERED READS FILE #
-        ##############################################
-        count_reads_written = 0
-        count_neighbors_reads_written = 0
-        
-        #with open(filtered_reads_file_name[:-3]+'_neighbors_indels.fa', 'w') as neighbors_filtered_reads_file:
-        # write the new filtered reads file (mutant-indels):
-        filtered_reads_file_basename_suffix = filtered_reads_file_basename.split('_')[-1].split('.')[0]
-        with open(str(path_to_data+prefix_date_and_id+'-mutant-indels'+'_'+barcode+'_'+filtered_reads_file_basename_suffix+'.fasta'), 'w') as neighbors_filtered_reads_file:
-            for pID in dict_all_reads.keys(): # for all pIDs                                
-                if not dict_neighbor_state_pIDs[pID]: # if pID "is not a neighbor"
-                    print 'write reads for pID: ' + pID
-
-                    # write the reads corresponding to pID
-                    for cur_read in dict_all_reads[pID]: # cur_read is a list [nb_occ_fwd,nb_occ_rev,seq]
-                        pid_orig = pID
-                        mut_flag = ''
-                        new_read_id = str(pID+'_'+str(cur_read[0])+'_'+str(cur_read[1])+'_'+pid_orig+mut_flag)
-                        neighbors_filtered_reads_file.write('>'+new_read_id+'\n') # write the new id of the read
-                        neighbors_filtered_reads_file.write(cur_read[2]+'\n') # write the sequence
-                        count_reads_written += (cur_read[0]+cur_read[1])
-
-                    # write the neighbors if there are
-                    if (len(dict_neighbors[pID]) > 0):
-                        print '#neighbors pIDs: ' + str(len(dict_neighbors[pID]))
-                        for pID_n in dict_neighbors[pID]: # for all the neighbors pID_n of pID
-                            # write the neighbors pID_n
-                            for read_n in dict_all_reads[pID_n]: # read_n is a list [nb_occ_fwd,nb_occ_rev,seq]
-                                pid_orig = pID_n
-                                mut_flag = '_m'
-                                new_read_id = str(pID+'_'+str(read_n[0])+'_'+str(read_n[1])+'_'+pid_orig+mut_flag)
-                                neighbors_filtered_reads_file.write('>'+new_read_id+'\n') # write the new id of the read
-                                neighbors_filtered_reads_file.write(read_n[2]+'\n') # write the sequence
-                                count_reads_written += (read_n[0]+read_n[1])
-                                count_neighbors_reads_written += (read_n[0]+read_n[1])
-
-
-        print 'nb reads written: '+str(count_reads_written)
-        print 'nb neighbored reads written: '+str(count_neighbors_reads_written)
-        print 'nb neighbored pIDs: '+str(nb_neighbors_stated)
-
-        
-    else:
-        print 'PB: "barcode" and "prefix_date_and_id" extracted from input filtered reads file and consensus sequences file are different, Stop here!'
+    # 1. Create the dictionary of pIDs (from the output files of p1_trim_and_filter)
+
+    ###################################
+    # -- READ THE FILTERED READS FILE #
+    ###################################
+    # dictionary containing the list of reads (values) associated to a pID (keys)
+    dict_all_reads, count_reads_added_to_dict_all_reads, count_reads_with_invalid_pID = lt.parse_readfile(barcode_dir+'aligned_reads_'+readtype+'.fasta')
+    # control prints
+    ####
+    print 'count_reads_with_invalid_pID: '+str(count_reads_with_invalid_pID)
+    print 'count_reads_added_to_dict_all_reads: '+str(count_reads_added_to_dict_all_reads)
+    print '#pIDs different: '+str(len(dict_all_reads.keys()))
+
+
+    pID_abundance = {}
+
+    for pID,reads in dict_all_reads.iteritems():
+        pID_abundance[pID] = sum([x[0]+x[1] for x in reads])
+
+    print '#pIDs occurring only once: ' + str([x==1 for x in pID_abundance.values()])
+    print '#pIDs occurring only twice: ' + str([x==2 for x in pID_abundance.values()])
+    print '#pIDs occurring at least 3 times: ' + str([x>2 for x in pID_abundance.values()])
+    ####
+
+
+    ########################################
+    # -- READ THE CONSENSUS SEQUENCES FILE #
+    ########################################
+    # dictionary containing the consensus sequence (values) associated to a pID (keys) if it occurs at least 3 times
+    dict_cons_seq, a, b = lt.parse_readfile(barcode_dir+'consensus_sequences_'+readtype+'.fasta')
+    print 'From consensus seq. file :'
+    print '#pIDs occurring at least 3 times: ' + str(len(dict_cons_seq.keys()))
+
+
+    ###########################
+    # -- SEARCH THE NEIGHBORS #
+    ###########################
+    # dictionary indicating the "neighbor" state of each pID
+    dict_neighbor_state_pIDs = {}
+    for pID in dict_all_reads.keys():
+        dict_neighbor_state_pIDs[pID] = False
+
+    list_occ_pIDs_dec = sorted(pID_abundance.items(), key = lambda x:x[0], reverse=True)
+    list_occ_pIDs_inc = list_occ_pIDs_dec[::-1]
+
+    print 'Check list_occ_pIDs_dec and list_occ_pIDs_inc lengths (should be equal): '        
+    print len(list_occ_pIDs_dec), len(list_occ_pIDs_inc)
+
+    # dictionary containing the parent pID (values) of a given pID (keys)
+    dict_neighbor_of = defaultdict(list)
+    dict_neighbors = defaultdict(list)
+
+    lim_pID_a = [list_occ_pIDs_inc[i][0]>=MIN_OCC_MOST_ABUND_PID for i in range(len(list_occ_pIDs_inc))].count(True)
+    print 'lim_pID_a:'+str(lim_pID_a)
+    print 'nb occ pID_a at pos lim_pID_a-1 in list_occ_pIDs_dec: '+str(list_occ_pIDs_dec[lim_pID_a-1][0])
+    print 'nb occ pID_a at pos lim_pID_a in list_occ_pIDs_dec: '+str(list_occ_pIDs_dec[lim_pID_a][0])
+
+    for ii_a, pID_a in enumerate(list_occ_pIDs_dec[:lim_pID_a]):
+        # this condition should always evaluate to true
+        if dict_neighbor_state_pIDs[pID_a[1]] == False:
+            continue
+        # loop over rare pid
+        for ii,pID_r in enumerate(list_occ_pIDs_inc[:-(ii_a+1)]):
+            # check whether this PID is already a neighbor of some other
+             if((dict_neighbor_state_pIDs[pID_r[1]] == False) and (pID_r[0] < pID_a[0])):
+                pIDs_align_score = align.localms(pID_a[1], pID_r[1], 1, 0, -0.6, -0.3)
+                if (len(pIDs_align_score) != 0): # if pID_a[1] and pID_r[1] can be aligned
+                    if (pIDs_align_score[0][2] >= min_pid_alignment_score):
+                        print 'pID_a: '+str(pID_a)+', pID_r: '+str(pID_r)
+                        print pIDs_align_score[0][0], pIDs_align_score[0][1]
+                        print dict_neighbor_state_pIDs[pID_a[1]], dict_neighbor_state_pIDs[pID_r[1]]
+                        print "possible neighbors"
+
+                        if pID_r[0]>2: 
+                            neighbor_candidates = dict_cons_seq[pID_r[1]]
+                        else:
+                            neighbor_candidates = dict_all_reads[pID_r[1]]
+
+
+                        if (pID_r[0] != 2): # pID_r occurs once or more than twice: only one sequence comparison to do
+                            seq_a = dict_cons_seq[pID_a[1]][0][2] 
+                            if(pID_r[0] == 1): # pID_r occurs once: its sequence is in dict_all_reads
+                                # seq_r = dict_all_reads[pID_r[0]][0][2]
+                                seq_r = dict_all_reads[pID_r[1]][0][2] # [nb_fwd,nb_rev,seq]
+                            else: # pID_r occurs at least 3 times: its consensus sequence is in dict_cons_seq
+                                seq_r = dict_cons_seq[pID_r[1]][0][2] # [nb_fwd,nb_rev,seq]
+
+                        for nf, nr, seq_r in neighbor_candidates:
+                            if lt.check_neighbor_plausibility(seq_a, seq_r, DIST_MAX):
+                                dict_neighbors[pID_a[1]].append(pID_r[1])
+                                dict_neighbor_of[pID_r[1]] = pID_a[1]
+                                dict_neighbor_state_pIDs[pID_r[1]] = True
+                                print "NEIGHBORS !"
+                                break
+
+
+    print 'neighbors:'
+    nb_neighbors_stated = 0
+    for pID in dict_neighbor_state_pIDs.keys():
+        if dict_neighbor_state_pIDs[pID]==True:
+            nb_neighbors_stated += 1
+            print pID
+
+
+
+    ##############################################
+    # -- WRITE THE NEIGHBORS FILTERED READS FILE #
+    ##############################################
+    count_reads_written = 0
+    count_neighbors_reads_written = 0
+
+    #with open(filtered_reads_file_name[:-3]+'_neighbors_indels.fa', 'w') as neighbors_filtered_reads_file:
+    # write the new filtered reads file (mutant-indels):
+    corrected_aligned_reads_fname = barcode_dir+'corrected_reads.fasta'
+    with open(corrected_aligned_reads_fname, 'w') as neighbors_filtered_reads_file:
+        for pID in dict_all_reads.keys(): # for all pIDs                                
+            if not dict_neighbor_state_pIDs[pID]: # if pID "is not a neighbor"
+                print 'write reads for pID: ' + pID
+
+                # write the reads corresponding to pID
+                for cur_read in dict_all_reads[pID]: # cur_read is a list [nb_occ_fwd,nb_occ_rev,seq]
+                    pid_orig = pID
+                    mut_flag = ''
+                    new_read_id = str(pID+'_'+str(cur_read[0])+'_'+str(cur_read[1])+'_'+pid_orig+mut_flag)
+                    neighbors_filtered_reads_file.write('>'+new_read_id+'\n') # write the new id of the read
+                    neighbors_filtered_reads_file.write(cur_read[2]+'\n') # write the sequence
+                    count_reads_written += (cur_read[0]+cur_read[1])
+
+                # write the neighbors if there are
+                if (len(dict_neighbors[pID]) > 0):
+                    print '#neighbors pIDs: ' + str(len(dict_neighbors[pID]))
+                    for pID_n in dict_neighbors[pID]: # for all the neighbors pID_n of pID
+
+                        # write the neighbors pID_n
+                        for read_n in dict_all_reads[pID_n]: # read_n is a list [nb_occ_fwd,nb_occ_rev,seq]
+                            pid_orig = pID_n
+                            mut_flag = '_m'
+                            new_read_id = str(pID+'_'+str(read_n[0])+'_'+str(read_n[1])+'_'+pid_orig+mut_flag)
+                            neighbors_filtered_reads_file.write('>'+new_read_id+'\n') # write the new id of the read
+                            neighbors_filtered_reads_file.write(read_n[2]+'\n') # write the sequence
+                            count_reads_written += (read_n[0]+read_n[1])
+                            count_neighbors_reads_written += (read_n[0]+read_n[1])
+
+
+    print 'nb reads written: '+str(count_reads_written)
+    print 'nb neighbored reads written: '+str(count_neighbors_reads_written)
+    print 'nb neighbored pIDs: '+str(nb_neighbors_stated)
+
+
 else:
-    print auto_file_name + ': usage: '+ auto_file_name + ' <filtered reads file (in ../data/)> <cons. seq. file (in ../templates/dir-<date_and_id>_consensus/)>'
+    print auto_file_name + ': usage: '+ auto_file_name + ' <directory with reads from a barcode>'