+ updated filtering
[qpalma.git] / tools / data_tools / filterReads.c
1 ////////////////////////////////////////////////////////////////////////////////
2 // The purpose of this program is to read a gff and a
3 // solexa reads file and create a data set used by QPalma.
4 //
5 //
6 //
7 // Notes:
8 //
9 // - Both the read indices and the gff gene indices are one-based
10 //
11 //
12 ////////////////////////////////////////////////////////////////////////////////
13
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <math.h>
21
22 #include "debug_tools.h"
23 #include "join.h"
24 #include "datastructures.h"
25
26 #define _FILE_OFFSET_BITS == 64
27
28 const char* line_format = "%d\t%d\t%s\t%lu\t%c\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n";
29
30 const int read_size = 36;
31
32 const int min_overlap = 1;
33 const int max_overlap = 35;
34
35 unsigned long read_nr = 1;
36 unsigned long unfiltered_read_nr = 1;
37
38 int parse_gff(char* filename,FILE* fid,struct gene*** allGenes);
39 void sort_genes(struct gene*** allGenes, int numGenes);
40 void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* out_fs, FILE* unfiltered_out_fs);
41 void combine_info(int exon_stop, int exon_start, Read** upstream, int up_size, Read** downstream, int down_size, FILE* out_fs,const char* gene_id, int exon_idx);
42 int join_reads(int exon_stop, int exon_start, Read* up_read, Read* down_read, FILE* out_fs,const char* gene_id, int exon_idx);
43 int fitting(char* up_prb, char* down_prb, int u_off, int d_off, int u_size, int d_size);
44 int remove_ambiguities(char * old_seq, char* new_seq);
45 Tuple join_seq(char* new_seq, char* up_seq,int u_off, int u_size, char* down_seq, int d_off, int d_size, int d_range);
46
47 static char *info = "Usage is:\n./filterReads gff reads output";
48
49 /*
50 * Some constants specifying the exact behavior of the filter
51 *
52 */
53
54 //#define _FDEBUG_ 0
55 //#define DEBUG_READ 38603
56
57 int combined_reads = 0;
58
59 void process_reads(FILE* reads_fs,struct gene*** allGenes,int numGenes, FILE* out_fs, FILE* unfiltered_out_fs) {
60 int status;
61
62 int buffer_size= 64;
63 int chr = 0;
64 int pos = 0;
65 char* seq = malloc(sizeof(char)*buffer_size);
66 unsigned long id = 0;
67 char strand = ' ';
68 int mismatch = 0;
69 int occurrence = 0;
70 int size = 0;
71 int cut = 0;
72 char* prb = malloc(sizeof(char)*buffer_size);
73 char* cal_prb = malloc(sizeof(char)*buffer_size);
74 char* chastity = malloc(sizeof(char)*buffer_size);
75
76 int reads_fid = fileno(reads_fs);
77 struct stat reads_stat;
78 if ( fstat(reads_fid,&reads_stat) == -1) {
79 perror("fstat");
80 exit(EXIT_FAILURE);
81 }
82 off_t reads_filesize = reads_stat.st_size;
83 printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
84 int numReads = reads_filesize / 178.0;
85
86 void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
87 if (reads_area == MAP_FAILED) {
88 perror("mmap");
89 exit(EXIT_FAILURE);
90 }
91 close(reads_fid);
92 printf("Successfully mapped %lu bytes of reads file into memory\n",(unsigned long)reads_filesize);
93
94 char* lineBeginPtr = (char*) reads_area;
95 char* lineEndPtr = (char*) reads_area;
96 char* end_of_mapped_area = ((char*) reads_area) + reads_filesize;
97
98 while (*lineEndPtr != '\n' && lineEndPtr != end_of_mapped_area) lineEndPtr++;
99 lineEndPtr++;
100
101 char* current_line = malloc(sizeof(char)*512);
102 memset(current_line,0,512);
103
104 int SIZE = 500;
105 // initialize boundary arrays
106 Read** upstream_overlap = malloc(sizeof(Read*)*SIZE);
107 Read** downstream_overlap= malloc(sizeof(Read*)*SIZE);
108 int uov,dov;
109 uov = dov = 0;
110
111 int skippedLinesCounter = 0;
112
113 int prev_exon_start = -1;
114 int prev_exon_stop = -1;
115 int cur_exon_start = -1;
116
117 unsigned long line_size = lineEndPtr - lineBeginPtr;
118 //printf("distance is %lu\n",line_size);
119 strncpy(current_line,lineBeginPtr,line_size);
120 current_line[line_size] = '\0';
121 //printf("%s test",current_line);
122
123 int gene_idx = 0;
124 int exon_idx = 1;
125 struct gene* currentGene = (*allGenes)[gene_idx];
126 char* gene_id = currentGene->id;
127
128 int skippedReadCtr = 0;
129 int uselessReadCtr = 0;
130 int exonicReadCtr = 0;
131 int endPrevCtr = 0;
132 int prevOverlapCtr = 0;
133 int currentStartCtr = 0;
134 int currentOverlapCtr = 0;
135 int multioccurReadCtr = 0;
136
137 Read* currentRead;
138 int up_idx, down_idx;
139
140 int readCtr = 0;
141 int read_start, read_stop;
142 // start of the parsing loop
143 while(1) {
144 if (gene_idx == numGenes || strcmp(current_line,"") == 0)
145 break;
146
147 gene_id = currentGene->id;
148
149 if (readCtr != 0 && readCtr % 1000000 == 0)
150 printf("Processed %d/%d genes and %d/%d reads.\n",gene_idx,numGenes,readCtr,numReads);
151
152 // pos of the reads is one-based
153 status = sscanf(current_line,line_format,&chr,&pos,seq,&id,&strand,&mismatch,&occurrence,&size,&cut,prb,cal_prb,chastity);
154 if (status < 12) {
155 skippedLinesCounter++;
156 goto next_read;
157 }
158
159 // if the read is occurring several times elsewhere then get rid of it
160 if ( occurrence != 1 ) {
161 multioccurReadCtr++;
162 goto next_read;
163 }
164
165 // define read start and stop positions
166 read_start = pos;
167 read_stop = pos + read_size-1;
168
169 if( strand != 'D' )
170 goto next_read;
171
172 FA(strlen(seq) >= read_size);
173
174 FA(currentGene != 0);
175
176 if ( currentGene->start <= read_start && read_stop <= currentGene->stop) { // read is within gene borders
177
178 exon_label:
179
180 if (exon_idx == currentGene->num_exons) {
181 gene_idx++;
182 exon_idx = 1;
183 currentGene = (*allGenes)[gene_idx];
184 while( currentGene == 0 && gene_idx < numGenes) {
185 currentGene = (*allGenes)[gene_idx];
186 gene_idx++;
187 }
188 continue;
189 }
190
191 prev_exon_start = currentGene->exon_starts[exon_idx-1];
192 prev_exon_stop = currentGene->exon_stops[exon_idx-1];
193 cur_exon_start = currentGene->exon_starts[exon_idx];
194
195 //printf("exon: %d %d %d\t pos: %d\n",prev_exon_start,prev_exon_stop,cur_exon_start,pos);
196
197 if ( (cur_exon_start - prev_exon_stop - 1) < min_overlap || cur_exon_start < read_start ) { // go to next exon
198
199 if (uov != 0 && dov != 0)
200 combine_info(prev_exon_stop,cur_exon_start,upstream_overlap,uov,downstream_overlap,dov,out_fs,gene_id,exon_idx);
201
202 for(up_idx=0;up_idx<uov;up_idx++) {
203 free_read(upstream_overlap[up_idx]);
204 free(upstream_overlap[up_idx]);
205 }
206
207 for(down_idx=0;down_idx<dov;down_idx++) {
208 free_read(downstream_overlap[down_idx]);
209 free(downstream_overlap[down_idx]);
210 }
211
212 uov = dov = 0;
213
214 exon_idx++;
215 goto exon_label;
216 }
217
218 if ( prev_exon_start <= read_start && read_stop <= prev_exon_stop ) { // read is inside previous exon
219
220 // output of unused i.e. unspliced reads
221 fprintf(unfiltered_out_fs,"%lu\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n",
222 unfiltered_read_nr,chr,strand,seq,0,read_size,prb,cal_prb,chastity,gene_id,read_start,-1,-1,read_stop);
223 unfiltered_read_nr++;
224
225 exonicReadCtr++;
226 goto next_read;
227 }
228
229 if ( read_stop < prev_exon_stop ) // go to next read
230 goto next_read;
231
232 // if this position is reached the read is somehow overlapping or
233 // exactly on the exon boundary.
234 if ( (prev_exon_stop - read_start + 1) >= min_overlap && (prev_exon_stop - read_start + 1) <= max_overlap ) { // read overlaps with previous exon boundary
235 //printf("%s\n",current_line);
236 prevOverlapCtr++;
237 currentRead = create_read(chr,read_start,seq,id,strand,mismatch,occurrence,size,cut,prb,cal_prb,chastity);
238 upstream_overlap[uov] = currentRead;
239 uov++;
240 goto next_read;
241 }
242
243 if ( ( read_stop - cur_exon_start + 1) >= min_overlap && (read_stop - cur_exon_start + 1) <= max_overlap ) { // read overlaps with current exon boundary
244 //printf("%s\n",current_line);
245 currentOverlapCtr++;
246 currentRead = create_read(chr,read_start,seq,id,strand,mismatch,occurrence,size,cut,prb,cal_prb,chastity);
247 downstream_overlap[dov] = currentRead;
248 dov++;
249 goto next_read;
250 }
251
252 uselessReadCtr++;
253 goto next_read; // read was not useful i.e. not overlapping/starting at exon boundaries
254
255 } else { // read is not within gene borders
256
257
258 if (uov != 0 && dov != 0)
259 combine_info(prev_exon_stop,cur_exon_start,upstream_overlap,uov,downstream_overlap,dov,out_fs,gene_id,exon_idx);
260
261 for(up_idx=0;up_idx<uov;up_idx++) {
262 free_read(upstream_overlap[up_idx]);
263 free(upstream_overlap[up_idx]);
264 }
265
266 for(down_idx=0;down_idx<dov;down_idx++) {
267 free_read(downstream_overlap[down_idx]);
268 free(downstream_overlap[down_idx]);
269 }
270
271 uov = dov = 0;
272
273 if ( currentGene->stop < read_stop ) { // go to next gene
274 gene_idx++;
275 exon_idx = 1;
276 currentGene = (*allGenes)[gene_idx];
277 while( currentGene == 0 && gene_idx < numGenes) {
278 currentGene = (*allGenes)[gene_idx];
279 gene_idx++;
280 }
281 //printf("currentGene->start / currentGene->stop %d/%d pos is %d\n",currentGene->start,currentGene->stop,pos);
282 continue;
283 }
284
285 if ( read_start < currentGene->start ) { // go to next read
286 skippedReadCtr++;
287
288 next_read:
289
290 lineBeginPtr = lineEndPtr;
291 while (*(char*)lineEndPtr != '\n' && lineEndPtr != end_of_mapped_area) lineEndPtr++;
292 lineEndPtr++;
293 readCtr += 1;
294 current_line = strncpy(current_line,lineBeginPtr,lineEndPtr-lineBeginPtr);
295 current_line[lineEndPtr-lineBeginPtr] = '\0';
296 continue;
297 }
298 }
299 }
300
301 if (uov != 0 && dov != 0)
302 combine_info(prev_exon_stop,cur_exon_start,upstream_overlap,uov,downstream_overlap,dov,out_fs,gene_id,exon_idx);
303
304 for(up_idx=0;up_idx<uov;up_idx++) {
305 free_read(upstream_overlap[up_idx]);
306 free(upstream_overlap[up_idx]);
307 }
308
309 for(down_idx=0;down_idx<dov;down_idx++) {
310 free_read(downstream_overlap[down_idx]);
311 free(downstream_overlap[down_idx]);
312 }
313
314 uov = dov = 0;
315
316 free(upstream_overlap);
317 free(downstream_overlap);
318
319 printf("Processed %d reads in total while %d reads where skipped.\n",readCtr,skippedLinesCounter);
320 printf("%d were totally intronic. %d were outside of genes and %d were occurring on several positions.\n",uselessReadCtr,skippedReadCtr,multioccurReadCtr);
321 printf("%d reads were useless in total\n",uselessReadCtr+skippedReadCtr+multioccurReadCtr);
322 printf("%d reads were totally exonic\n",exonicReadCtr);
323 printf("%d reads overlap with prev. exon. %d reads overlap with current exon\n",prevOverlapCtr,currentOverlapCtr);
324 printf("%d reads where newly combined from two original reads\n",combined_reads);
325 printf("Total used reads: %d\n",exonicReadCtr+endPrevCtr+prevOverlapCtr+currentStartCtr+currentOverlapCtr);
326
327 status = munmap(reads_area,reads_filesize);
328 if(status != 0)
329 perror("munmap");
330
331 free(current_line);
332 free(seq);
333 free(prb);
334 free(cal_prb);
335 free(chastity);
336 }
337
338
339 void combine_info(int exon_stop, int exon_start, Read** upstream, int up_size, Read** downstream, int down_size, FILE* out_fs,const char* gene_id, int exon_idx) {
340 int up_idx, down_idx, success;
341
342 char* up_used_flag = calloc(up_size,sizeof(char));
343 char* down_used_flag = calloc(down_size,sizeof(char));
344
345 Read* currentUpRead;
346 Read* currentDownRead;
347
348 for(up_idx=0;up_idx<up_size;up_idx++) {
349 if( up_used_flag[up_idx] == 1)
350 continue;
351
352 currentUpRead = upstream[up_idx];
353
354 for(down_idx=0;down_idx<down_size;down_idx++) {
355
356 if( up_used_flag[up_idx] == 1 || down_used_flag[down_idx] == 1)
357 continue;
358
359 currentDownRead = downstream[down_idx];
360
361 if(currentUpRead->strand != currentDownRead->strand)
362 continue;
363
364 success = join_reads(exon_stop,exon_start,currentUpRead,currentDownRead,out_fs,gene_id,exon_idx);
365
366 if(success == 1) {
367 up_used_flag[up_idx] = 1;
368 down_used_flag[down_idx] = 1;
369 }
370 }
371 }
372
373 free(up_used_flag);
374 free(down_used_flag);
375 }
376
377 /*
378 *
379 *
380 *
381 * ACGTACGTCA GTXXXXXXXXAG ACGTAGACGT
382 * p1 e1 e2 p2
383 *
384 *
385 *
386 *
387 */
388
389 int join_reads(int exon_stop, int exon_start, Read* up_read, Read* down_read, FILE* out_fs,const char* gene_id, int exon_idx) {
390 // range of possible sequence length on exon side
391 int up_read_start = up_read->pos;
392 //int up_read_stop = up_read->pos+read_size-1;
393
394 int down_read_start = down_read->pos;
395 int down_read_stop = down_read->pos+read_size-1;
396
397 int up_range = exon_stop - up_read_start + 1;
398 int down_range = down_read_stop - exon_start + 1;
399 int retval;
400
401 int u_size, d_size;
402 u_size = d_size = -1;
403
404 if(up_range+down_range < read_size)
405 return 0;
406
407 if (read_nr % 2 != 0) {
408 d_size = down_range;
409 u_size = read_size - d_size;
410 } else {
411 u_size = up_range;
412 d_size = read_size - u_size;
413 }
414
415 if( u_size > up_range || d_size > down_range)
416 return 0;
417
418 const int p_start = exon_stop - u_size + 1;
419 const int p_stop = exon_start + d_size - 1;
420
421 int u_off = p_start - up_read_start;
422 int d_off = exon_start - down_read_start;
423
424 FA(u_off >= 0);
425 FA(d_off >= 0);
426
427 FA( exon_stop - p_start + p_stop - exon_start + 2 == read_size);
428 FA( u_size + d_size == read_size );
429
430 // seems reasonable up to here
431
432 int buf_size = 4*read_size;
433 char* new_seq = malloc(sizeof(char)*buf_size);
434 memset(new_seq,'z',sizeof(char)*buf_size);
435
436 Tuple jinfo = join_seq(new_seq,up_read->seq,u_off,u_size,down_read->seq,d_off,d_size,down_range);
437
438 int cut_pos = jinfo.first;
439 int additional_pos = jinfo.second;
440
441 buf_size = read_size+1+additional_pos;
442
443 char* new_prb = malloc(sizeof(char)*buf_size);
444 char* new_cal_prb = malloc(sizeof(char)*buf_size);
445 char* new_chastity = malloc(sizeof(char)*buf_size);
446
447 if( jinfo.first == -1 ) {
448 retval = 0;
449 goto free;
450 }
451
452 if( additional_pos > (down_range - d_size) ) {
453 retval = 0;
454 goto free;
455 }
456
457 strncpy(new_prb, up_read->prb+u_off, u_size);
458 strncpy(new_prb+u_size, down_read->prb+d_off, d_size+additional_pos);
459 new_prb[read_size+additional_pos] = '\0';
460
461 strncpy(new_cal_prb, up_read->cal_prb+u_off, u_size);
462 strncpy(new_cal_prb+u_size, down_read->cal_prb+d_off, d_size+additional_pos);
463 new_cal_prb[read_size+additional_pos] = '\0';
464
465 strncpy(new_chastity, up_read->chastity+u_off, u_size);
466 strncpy(new_chastity+u_size, down_read->chastity+d_off, d_size+additional_pos);
467 new_chastity[read_size+additional_pos] = '\0';
468
469
470 int status = 1; //fitting(up_read->prb,down_read->prb,u_off,d_off,u_size,d_size);
471
472 if(status != 1) {
473 retval = 0;
474 goto free;
475 }
476
477 retval = status;
478
479 //fprintf(out_fs,"%d\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%s\t%s\n",
480 // read_nr,up_read->chr,up_read->strand,new_seq,cut_pos,read_size,new_prb,new_cal_prb,new_chastity,gene_id,p_start,exon_stop,exon_start,p_stop,up_read->seq,down_read->seq);
481
482 //fprintf(out_fs,"%lu\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n",
483 // read_nr,up_read->chr,up_read->strand,new_seq,cut_pos,read_size,new_prb,new_cal_prb,new_chastity,gene_id,p_start,exon_stop,exon_start,p_stop,u_size);
484
485 fprintf(out_fs,"%lu\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n",
486 read_nr,up_read->chr,up_read->strand,new_seq,cut_pos,read_size,new_prb,new_cal_prb,new_chastity,gene_id,p_start,exon_stop,exon_start,p_stop,u_size);
487
488 read_nr++;
489 combined_reads++;
490
491 free:
492 free(new_seq);
493 free(new_prb);
494 free(new_cal_prb);
495 free(new_chastity);
496
497 return retval;
498 }
499
500
501 void print_read(Read* cRead) {
502 printf(line_format,
503 cRead->chr, cRead->pos, cRead->seq, cRead->id,
504 cRead->strand, cRead->mismatch, cRead->occurrence,
505 cRead->size, cRead->cut, cRead->prb, cRead->cal_prb,
506 cRead->chastity);
507 }
508
509 /*
510 * The mainloop
511 *
512 */
513
514 int main(int argc, char* argv[]) {
515
516 if(argc != 7) {
517 printf("%s\n",info);
518 exit(EXIT_FAILURE);
519 }
520
521 int status;
522 int filenameSize = 256;
523 char* gff_filename = malloc(sizeof(char)*filenameSize);
524 char* reads_filename = malloc(sizeof(char)*filenameSize);
525 char* output_filename = malloc(sizeof(char)*filenameSize);
526 char* unfiltered_output_filename = malloc(sizeof(char)*filenameSize);
527
528 strncpy(gff_filename,argv[1],filenameSize);
529 strncpy(reads_filename,argv[2],filenameSize);
530 strncpy(output_filename,argv[3],filenameSize);
531 strncpy(unfiltered_output_filename,argv[4],filenameSize);
532
533 FILE *gff_fs = fopen(gff_filename,"r");
534 FILE *reads_fs = fopen(reads_filename,"r");
535 FILE *out_fs = fopen(output_filename,"w");
536 FILE *unfiltered_out_fs = fopen(unfiltered_output_filename,"w");
537
538 read_nr = strtoul(argv[5],NULL,10);
539 read_nr++;
540
541 unfiltered_read_nr = strtoul(argv[6],NULL,10);
542 unfiltered_read_nr++;
543
544 if(gff_fs == NULL) {
545 printf("Error: Could not open file: %s",gff_filename);
546 exit(EXIT_FAILURE);
547 }
548
549 if(reads_fs == NULL) {
550 printf("Error: Could not open file: %s",reads_filename);
551 exit(EXIT_FAILURE);
552 }
553
554 if(out_fs == NULL) {
555 printf("Error: Could not open file: %s",output_filename);
556 exit(EXIT_FAILURE);
557 }
558
559 if(unfiltered_out_fs == NULL) {
560 printf("Error: Could not open file: %s",unfiltered_output_filename);
561 exit(EXIT_FAILURE);
562 }
563
564 struct gene** allGenes;
565 int numGenes = parse_gff(gff_filename,gff_fs,&allGenes);
566 status = fclose(gff_fs);
567 free(gff_filename);
568 if(status != 0)
569 printf("closing of gff filestream failed!\n");
570
571 printf("Successfully parsed gff file! Found %d genes.\n",numGenes);
572
573 // check if allGenes is sorted. if not throw away those genes that do not
574 // occur in the sorted order
575 int g_idx;
576 struct gene* currentGene = 0;
577 int nulled_genes=0;
578 int old_gene_stop = -1;
579 for(g_idx=0;g_idx<numGenes;g_idx++) {
580 currentGene = allGenes[g_idx];
581
582 if (! (currentGene->start < currentGene->stop))
583 printf("Invalid positions for gene %s!\n",currentGene->id);
584
585 if (! (old_gene_stop < currentGene->start ) ) {
586 old_gene_stop = currentGene->stop;
587 allGenes[g_idx] = 0;
588 nulled_genes++;
589 continue;
590 }
591 old_gene_stop = currentGene->stop;
592 }
593
594 printf("Found %d unordered genes.\n",nulled_genes);
595 int gidx, eidx;
596 int exon_cov = 0;
597 for(gidx=0;gidx<numGenes;gidx++) {
598 if (allGenes[gidx] == 0)
599 continue;
600
601 for(eidx=0;eidx<allGenes[gidx]->num_exons;eidx++) {
602 exon_cov += allGenes[gidx]->exon_stops[eidx] - allGenes[gidx]->exon_starts[eidx];
603 }}
604 printf("Exon coverage is %f\n",(double)exon_cov/30432563);
605
606 process_reads(reads_fs,&allGenes,numGenes,out_fs,unfiltered_out_fs);
607
608 for(g_idx=0;g_idx<numGenes;g_idx++) {
609 if(allGenes[g_idx] != 0)
610 free_gene(allGenes[g_idx]);
611 free(allGenes[g_idx]);
612
613 }
614 free(allGenes);
615
616 status = fclose(reads_fs);
617 status = fclose(out_fs);
618 if(status != 0)
619 perror("fclose");
620
621 free(reads_filename);
622 free(output_filename);
623
624 return 0;
625 }
626
627 /*
628 * TODO:
629 * - Check strand -> done simple (only if equal)
630 * - check for [AC] and similar entries -> done simple (see function
631 * - remove_ambiguities (exchanges [XY] by the second entry)
632 */
633
634 #ifdef _FDEBUG_
635 if(read_nr == DEBUG_READ) {
636 printf("read nr %lu",read_nr);
637 printf("pos: %d %d %d %d\n",p_start,exon_stop,exon_start,p_stop);
638 printf("u/d range: %d %d\n",up_range,down_range);
639 printf("u/d size: %d %d\n",u_size,d_size);
640 printf("u/d off: %d %d\n",u_off,d_off);
641 printf("add_pos/down_range-d_size: %d %d\n",additional_pos,(down_range - d_size));
642
643 printf("******************\n");
644
645 printf("%s\n",up_read->seq);
646 //printf("%s\n",new_up_seq);
647
648 printf("******************\n");
649
650 printf("%s\n",down_read->seq);
651 //printf("%s\n",new_down_seq);
652
653 printf("******************\n");
654 printf("%s\n",new_seq);
655 printf("%s\n",new_prb);
656 printf("%s\n",new_cal_prb);
657 printf("%s\n",new_chastity);
658 }
659 #endif // _FDEBUG_