int fitting(char* up_prb, char* up_prb_end, char* down_prb, char* down_prb_end);
+void remove_ambiguities(char * old_seq, int old_seq_size, char* new_seq);
+
static char *info = "Usage is:\n./filterReads gff reads output";
const int read_size = 36;
if (up_idx == up_size || down_idx == down_size)
break;
+ if ( up_strand != down_strand )
+ break;
+
strncpy(upstream_line,upstream[up_idx],256);
status = sscanf(upstream_line,"%d\t%d\t%s\t%d\t%c\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n",
&up_chr,&up_pos,up_seq,&up_id,&up_strand,&up_mismatch,&up_occurrence,&up_sz,
&down_chr,&down_pos,down_seq,&down_id,&down_strand,&down_mismatch,&down_occurrence,&down_sz,
&down_cut,down_prb,down_cal_prb,down_chastity);
+ char* new_up_seq = malloc(sizeof(char)*read_size);
+ char* new_down_seq = malloc(sizeof(char)*read_size);
+
+ remove_ambiguities(up_seq,strlen(up_seq),new_up_seq);
+ remove_ambiguities(down_seq,strlen(down_seq),new_down_seq);
+
+ new_seq[0] = '\0';
new_prb[0] = '\0';
new_cal_prb[0] = '\0';
new_chastity[0] = '\0';
new_chr = up_chr;
new_strand = up_strand;
- strncat(new_seq,up_seq+(36-overlap),overlap);
+ strncat(new_seq,new_up_seq+(36-overlap),overlap);
strncat(new_prb,up_prb+(36-overlap),overlap);
strncat(new_cal_prb,up_cal_prb+(36-overlap),overlap);
strncat(new_chastity,up_chastity+(36-overlap),overlap);
- strncat(new_seq,down_seq+overlap,36-overlap);
+ strncat(new_seq,new_down_seq+overlap,36-overlap);
strncat(new_prb,down_prb+overlap,36-overlap);
strncat(new_cal_prb,down_cal_prb+overlap,36-overlap);
strncat(new_chastity,down_chastity+overlap,36-overlap);
new_chr = up_chr;
new_strand = up_strand;
- strncat(new_seq,up_seq,(36-overlap));
+ strncat(new_seq,new_up_seq,(36-overlap));
strncat(new_prb,up_prb,(36-overlap));
strncat(new_cal_prb,up_cal_prb,(36-overlap));
strncat(new_chastity,up_chastity,(36-overlap));
- strncat(new_seq,down_seq,overlap);
+ strncat(new_seq,new_down_seq,overlap);
strncat(new_prb,down_prb,overlap);
strncat(new_cal_prb,down_cal_prb,overlap);
strncat(new_chastity,down_chastity,overlap);
up_idx++;
down_idx++;
+
+ free(new_up_seq);
+ free(new_down_seq);
}
}
return 0;
}
+void remove_ambiguities(char * old_seq, int old_seq_size, char* new_seq) {
+ //printf("old seq: %s\n",old_seq);
+ //printf("new seq: %s\n",new_seq);
+
+ int idx=0;
+ int new_idx = 0;
+ while(idx<old_seq_size) {
+ if (old_seq[idx] == '[') {
+ new_seq[new_idx] = old_seq[++idx];
+ new_idx++;
+ idx += 3;
+ continue;
+ }
+
+ new_seq[new_idx] = old_seq[idx++];
+ new_idx++;
+ }
+ //printf("old seq: %s\n",old_seq);
+ //printf("new seq: %s\n",new_seq);
+}
+
/*
- * TODO
- * - Check strand
- * - check for [AC] and similar entries
+ * TODO:
+ * - Check strand -> done simple (only if equal)
+ * - check for [AC] and similar entries -> done simple (see function
+ * remove_ambiguities (exchanges [XY] by the first entry)
*/