#include <sys/mman.h>
#include <sys/stat.h>
+#define WITH_QUALITIES 0
+
// the line format is defined as follows
// id chr strand seq splitpos size q1 q2 q3 geneId p1 p2 p3 p4 true_cut
const char* line_format = "%lu\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n";
int status;
// create dictionary representing one line
- PyObject* entry_dict = PyDict_New();
- PyObject *id_py = PyInt_FromLong(id);
-
- // add entries of that line
- status = PyDict_SetItem(entry_dict, PyString_FromString("id"), id_py );
- status = PyDict_SetItem(entry_dict, PyString_FromString("chr"), PyInt_FromLong(chr) );
+ //PyObject* entry_dict = PyDict_New();
- PyObject *strand_py = PyString_FromString("--");
+ // alternative way using a list instead of a dictionary
+ PyObject* entry_list = PyList_New(15);
+ Py_INCREF( entry_list );
+ PyObject *id_py = PyInt_FromLong(id);
+ PyObject *strand_py = PyString_FromString("--");
if ( strand == 'D' )
strand_py = PyString_FromString("+");
if ( strand == 'P' )
strand_py = PyString_FromString("-");
+ //printf("before : %s\n",seq);
Py_ssize_t idx;
for(idx=0;idx<strlen(seq);idx++) {
- if (seq[idx] < 85)
+ if ( 65 <= seq[idx] && seq[idx] < 85)
seq[idx] = seq[idx]+32;
}
+ //printf("after : %s\n",seq);
- status = PyDict_SetItem(entry_dict, PyString_FromString("seq"), PyString_FromString(seq) );
-
- status = status || PyDict_SetItem(entry_dict, PyString_FromString("strand"), strand_py );
- status = PyDict_SetItem(entry_dict, PyString_FromString("splitpos"), PyInt_FromLong(splitpos) );
- status = PyDict_SetItem(entry_dict, PyString_FromString("read_size"), PyInt_FromLong(size) );
- status = PyDict_SetItem(entry_dict, PyString_FromString("true_cut"), PyInt_FromLong(true_cut) );
+ // add entries of that line
+
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("id"), id_py );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("chr"), PyInt_FromLong(chr) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("seq"), PyString_FromString(seq) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("strand"), strand_py );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("splitpos"), PyInt_FromLong(splitpos) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("read_size"), PyInt_FromLong(size) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("true_cut"), PyInt_FromLong(true_cut) );
+
+ status = PyList_SetItem(entry_list, 0, id_py );
+ status = PyList_SetItem(entry_list, 1, PyInt_FromLong(chr) );
+ status = PyList_SetItem(entry_list, 2, PyString_FromString(seq) );
+ status = PyList_SetItem(entry_list, 3, strand_py );
+ status = PyList_SetItem(entry_list, 4, PyInt_FromLong(splitpos) );
+ status = PyList_SetItem(entry_list, 5, PyInt_FromLong(size) );
+ status = PyList_SetItem(entry_list, 6, PyInt_FromLong(true_cut) );
PyObject* prb_list = PyList_New(size);
PyObject* cal_prb_list = PyList_New(size);
PyObject* chastity_list = PyList_New(size);
+#if WITH_QUALITIES
for(idx=0;idx<size;idx++) {
status = PyList_SetItem( prb_list, idx, PyInt_FromLong(prb[idx]-50) );
status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(cal_prb[idx]-64) );
status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(chastity[idx]+10) );
}
- status = PyDict_SetItem(entry_dict, PyString_FromString("prb"), prb_list );
- status = PyDict_SetItem(entry_dict, PyString_FromString("cal_prb"), cal_prb_list );
- status = PyDict_SetItem(entry_dict, PyString_FromString("chastity"), chastity_list );
-
- status = PyDict_SetItem(entry_dict, PyString_FromString("gene_id"), PyString_FromString(geneId) );
-
- status = PyDict_SetItem(entry_dict, PyString_FromString("p_start"), PyInt_FromLong(p_start) );
- status = PyDict_SetItem(entry_dict, PyString_FromString("exon_stop"), PyInt_FromLong(exon_stop) );
- status = PyDict_SetItem(entry_dict, PyString_FromString("exon_start"), PyInt_FromLong(exon_start) );
- status = PyDict_SetItem(entry_dict, PyString_FromString("p_stop"), PyInt_FromLong(p_stop) );
+ status = PyList_SetItem(entry_list, 7, prb_list );
+ status = PyList_SetItem(entry_list, 8, cal_prb_list );
+ status = PyList_SetItem(entry_list, 9, chastity_list );
+#else
+ status = PyList_SetItem(entry_list, 7, PyString_FromString("") );
+ status = PyList_SetItem(entry_list, 8, PyString_FromString("") );
+ status = PyList_SetItem(entry_list, 9, PyString_FromString("") );
+#endif
+
+
+ status = PyList_SetItem(entry_list, 10, PyString_FromString(geneId) );
+ status = PyList_SetItem(entry_list, 11, PyInt_FromLong(p_start) );
+ status = PyList_SetItem(entry_list, 12, PyInt_FromLong(exon_stop) );
+ status = PyList_SetItem(entry_list, 13, PyInt_FromLong(exon_start) );
+ status = PyList_SetItem(entry_list, 14, PyInt_FromLong(p_stop) );
+
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("prb"), prb_list );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("cal_prb"), cal_prb_list );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("chastity"), chastity_list );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("gene_id"), PyString_FromString(geneId) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("p_start"), PyInt_FromLong(p_start) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("exon_stop"), PyInt_FromLong(exon_stop) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("exon_start"), PyInt_FromLong(exon_start) );
+ //status = PyDict_SetItem(entry_dict, PyString_FromString("p_stop"), PyInt_FromLong(p_stop) );
// now save the dictionary representing one line in the dictionary
// representing the whole file
- status = PyDict_SetItem(result_dict, id_py, entry_dict);
+ //status = PyDict_SetItem(result_dict, id_py, entry_dict);
+
+ status = PyDict_SetItem(result_dict, id_py, entry_list);
if (status != 0) {
PyErr_Warn(PyExc_Warning, "qparser.parse_reads: Failed to add item!");
}
+ Py_DECREF( entry_list );
// decrement the reference count as we are finished with the local
// modification of the object
Py_DECREF( result_dict );
readCtr += 1;
}
+ // create a dictionary that maps attributes to their respective index in the
+ // list
+ PyObject* map_dict = PyDict_New();
+
+ status = PyDict_SetItem(map_dict, PyString_FromString("id"), PyInt_FromLong(0) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("chr"), PyInt_FromLong(1) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("seq"), PyInt_FromLong(2) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("strand"), PyInt_FromLong(3) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("splitpos"), PyInt_FromLong(4) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("read_size"), PyInt_FromLong(5) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("true_cut"), PyInt_FromLong(6) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("prb"), PyInt_FromLong(7) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("cal_prb"), PyInt_FromLong(8) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("chastity"), PyInt_FromLong(9) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("gene_id"), PyInt_FromLong(10) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("p_start"), PyInt_FromLong(11) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("exon_stop"), PyInt_FromLong(12) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("exon_start"), PyInt_FromLong(13) );
+ status = PyDict_SetItem(map_dict, PyString_FromString("p_stop"), PyInt_FromLong(14) );
+
+ // create result tuple
+ PyObject *result_tuple = PyTuple_Pack( 2, map_dict, result_dict );
+
// clean up
status = munmap(reads_area,reads_filesize);
if(status != 0)
if ( result_size != readCtr )
printf("Error: size of dict does not equal number of reads!\n");
- return result_dict;
+ return result_tuple;
}