6d97210ce762ce454399430191cac946d9bf68a8
[pid.git] / src / p3_cluster_bis_clean_logs_and_move_back_aligned_files.py
1 #!/usr/bin/python
2
3 # /!\: script that must be run ONLY after the end of the cluster jobs
4 # - script that cleans the cluster logs in src directory
5 # and moves the temp and aligned reads files from the cluster specific directory ( in ../templates) given in input
6 # to their corresponding temp and align directory in ../templates (barcode specific)
7
8 import os
9 import sys
10 import time
11 import glob
12 from collections import Counter
13 from collections import defaultdict
14 import lib_tools as lt
15
16 auto_file_name = str(sys.argv[0])
17
18
19 ######
20
21 if __name__=='__main__':
22
23 if (len(sys.argv)==2):
24
25 # 1. clean the cluster logs in src directory (normally the current directory)
26 os.system('rm -f p3_cluster_align_aux.py.e* p3_cluster_align_aux.py.o*')
27 print 'cluster logs p3_cluster_align_aux_py.e* and p3_cluster_align_aux.py.o* deleted'
28
29 # 2. move the temp and aligned reads files from the cluster directory to their specific temp/align directory
30 relative_path_to_cluster_dir = str(sys.argv[1])
31 if relative_path_to_cluster_dir[-1]!='/':
32 relative_path_to_cluster_dir+='/'
33
34 path_to_templates = "../templates/"
35 prefix_date_and_id=relative_path_to_cluster_dir.split('/')[2].split('cluster-')[1]
36 print prefix_date_and_id
37
38 #list_files = os.popen('ls '+relative_path_to_cluster_dir+'*').readlines()
39 list_files = glob.glob(relative_path_to_cluster_dir+'*')
40 total_nb_files_in_cluster_dir = len(list_files)
41 print total_nb_files_in_cluster_dir
42
43 count_files = Counter()
44 dict_files = defaultdict(list)
45
46 for cur_file in list_files:
47 cur_file = cur_file.strip()
48 cur_file_base_name = cur_file.split('/')[-1]
49 cur_file_type, cur_file_bc = [cur_file_base_name.split('_')[i] for i in [1,2]]
50 count_files[cur_file_type]+=1
51 dict_files[(cur_file_type,cur_file_bc)].append(cur_file_base_name)
52
53 print '#files in cluster directory: '+str(count_files)
54 # print dict_files
55
56 # move the files to their directory
57 for type_and_bc in dict_files.keys():
58 # create the aligned files directory if necessary
59 new_file_directory = str(path_to_templates+'dir-'+prefix_date_and_id+'_'+type_and_bc[0]+'_'+type_and_bc[1]+'/')
60 lt.check_and_create_directory(new_file_directory)
61 for cur_file in dict_files[type_and_bc]:
62 print 'move file '+relative_path_to_cluster_dir+cur_file+' to the directory: '+new_file_directory
63 os.system('mv '+relative_path_to_cluster_dir+cur_file+' '+new_file_directory)
64 count_files[type_and_bc[0]]-=1
65
66 print '#remaining files in cluster directory: '+str(count_files)
67
68 else:
69 print auto_file_name+': usage: '+auto_file_name+' <cluster directory (in ../templates/)>'