1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "debug_tools.h"
6 int fitting(char* up_prb, char* down_prb, int u_off, int d_off, int u_size, int d_size) {
7 double current_mean_up = 0;
8 double current_mean_down = 0;
10 int w_size = 2;
12 int idx;
14 //printf("prb %s\n",up_prb);
15 //printf("prb %s\n",down_prb);
17 for(idx=0;idx<w_size;idx++) {
18 current_mean_up += up_prb[u_off+u_size+idx]-50;
19 current_mean_up += up_prb[u_off+u_size-idx]-50;
20 current_mean_up -= up_prb[idx]-50;
22 current_mean_down += up_prb[d_off+idx]-50;
23 current_mean_down += up_prb[d_off+idx]-50;
24 current_mean_down -= up_prb[idx]-50;
25 }
27 current_mean_up /= (2*w_size+1);
28 current_mean_down /= (2*w_size+1);
30 double ratio;
31 if( current_mean_up > current_mean_down)
32 ratio = current_mean_down / current_mean_up;
33 else
34 ratio = current_mean_up / current_mean_down;
36 //printf("ratio is %f\n",ratio);
38 if (ratio < 0.35)
39 return 0;
41 return 1;
42 }
44 /*
45 * As we can have 3 kinds of ambiguities
46 *
47 * [AG]
48 * [A-]
49 * [-A]
50 *
51 * we want to remove/store these ambiguities in the filtered reads
52 */
54 int remove_ambiguities(char * old_seq, char* new_seq) {
56 int idx=0;
57 int new_idx = 0;
58 int old_seq_size = strlen(old_seq);
60 int est_deletions_ctr = 0;
62 while(idx<old_seq_size) {
63 //printf("Current elem %c pos %d %d\n",old_seq[idx],idx,new_idx);
64 if (old_seq[idx] == ']') {
65 idx++;
66 continue;
67 }
69 if (old_seq[idx] == '[') {
71 // we have a indel either on the dna or read sequence
72 if (old_seq[idx+1] == '-' || old_seq[idx+2] == '-') {
74 if(old_seq[idx+2] == '-')
75 est_deletions_ctr += 1;
77 while(1) {
78 new_seq[new_idx] = old_seq[idx];
79 if(old_seq[idx] == ']')
80 break;
81 new_idx++;
82 idx++;
83 }
85 } else {
86 idx += 2;
87 continue;
88 }
89 }
91 new_seq[new_idx] = old_seq[idx];
92 idx++;
93 new_idx++;
94 }
96 new_seq[new_idx] = '\0';
97 //printf("new_seq is %d :: %s\n",new_idx,new_seq);
98 return est_deletions_ctr;
99 }