+ fixed seq size bug
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 15 May 2008 12:06:01 +0000 (12:06 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 15 May 2008 12:06:01 +0000 (12:06 +0000)
+ added check to ensure that this c code does the same as the old python code

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9024 e1793c9e-67f9-0310-80fc-b846ff1f7b36

cparser/qparser.c
cparser/test.py

index 07333a3..a5f921a 100644 (file)
@@ -28,6 +28,10 @@ char* geneId   = 0;
 
 
 static int set_item_from_line(PyObject *result_dict, const char* current_line) {
+   
+   // increment the reference count for the result_dict object because we want
+   // ot modify it
+   Py_INCREF( result_dict );
 
    //printf("current line is %s\n",current_line);
    int entries_found = sscanf(current_line,line_format,&id,&chr,&strand,seq,&splitpos,&size,prb,cal_prb,chastity,geneId,&p_start,&exon_stop,&exon_start,&p_stop,&true_cut);
@@ -56,16 +60,23 @@ static int set_item_from_line(PyObject *result_dict, const char* current_line) {
    if ( strand == 'P' )
       strand_py = PyString_FromString("-");
 
-   status = status || PyDict_SetItem(entry_dict, PyString_FromString("strand"),     strand_py );
+   Py_ssize_t idx;
+   for(idx=0;idx<strlen(seq);idx++) {
+      if (seq[idx] < 85)
+         seq[idx] = seq[idx]+32;
+   }
+
    status = PyDict_SetItem(entry_dict, PyString_FromString("seq"),        PyString_FromString(seq) );
+
+   status = status || PyDict_SetItem(entry_dict, PyString_FromString("strand"),     strand_py );
    status = PyDict_SetItem(entry_dict, PyString_FromString("splitpos"),   PyInt_FromLong(splitpos) );
-   status = PyDict_SetItem(entry_dict, PyString_FromString("size"),       PyInt_FromLong(size) );
+   status = PyDict_SetItem(entry_dict, PyString_FromString("read_size"),       PyInt_FromLong(size) );
+   status = PyDict_SetItem(entry_dict, PyString_FromString("true_cut"),       PyInt_FromLong(true_cut) );
 
    PyObject* prb_list = PyList_New(size);
    PyObject* cal_prb_list = PyList_New(size);
    PyObject* chastity_list = PyList_New(size);
 
-   Py_ssize_t idx;
    for(idx=0;idx<size;idx++) {
       status = PyList_SetItem( prb_list, idx, PyInt_FromLong(prb[idx]-50) );
       status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(cal_prb[idx]-64) );
@@ -75,7 +86,9 @@ static int set_item_from_line(PyObject *result_dict, const char* current_line) {
    status = PyDict_SetItem(entry_dict, PyString_FromString("prb"),        prb_list );
    status = PyDict_SetItem(entry_dict, PyString_FromString("cal_prb"),    cal_prb_list );
    status = PyDict_SetItem(entry_dict, PyString_FromString("chastity"),   chastity_list );
-   //status = PyDict_SetItem(entry_dict, FromString("gene_id"), id_py );
+
+   status = PyDict_SetItem(entry_dict, PyString_FromString("gene_id"), PyString_FromString(geneId) );
+
    status = PyDict_SetItem(entry_dict, PyString_FromString("p_start"),    PyInt_FromLong(p_start) );
    status = PyDict_SetItem(entry_dict, PyString_FromString("exon_stop"),  PyInt_FromLong(exon_stop) );
    status = PyDict_SetItem(entry_dict, PyString_FromString("exon_start"), PyInt_FromLong(exon_start) );
@@ -88,6 +101,10 @@ static int set_item_from_line(PyObject *result_dict, const char* current_line) {
                PyErr_Warn(PyExc_Warning, "qparser.parse_reads: Failed to add item!");
    }
 
+   // decrement the reference count as we are finished with the local
+   // modification of the object
+   Py_DECREF( result_dict );
+
    return status;
 }
 
@@ -123,7 +140,7 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
 
    off_t reads_filesize = reads_stat.st_size;                                                                                                                                                                                  
    printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
-   int numReads = reads_filesize / 178.0;
+   //int numReads = reads_filesize / 178.0;
 
    // try to acquire file using mmap
    void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
@@ -150,7 +167,6 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
    current_line[line_size] = '\0';
 
    int readCtr = 0;
-   int skippedLinesCounter = 0;
    int status = 0;
 
    // The result dict stores all lines in the form of small dictionaries
@@ -169,9 +185,10 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
       while (*(char*)lineEndPtr != '\n' && lineEndPtr != end_of_mapped_area) lineEndPtr++;
       lineEndPtr++;
 
-      readCtr += 1;
       current_line = strncpy(current_line,lineBeginPtr,lineEndPtr-lineBeginPtr);
       current_line[lineEndPtr-lineBeginPtr] = '\0';
+
+      readCtr += 1;
    }
 
    // clean up
@@ -185,26 +202,32 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
    free(chastity);
    free(geneId);
 
-   //Py_ssize_t PyDict_Size( PyObject *p)
+   Py_ssize_t result_size = PyDict_Size( result_dict );
+   
+   if ( result_size != readCtr )
+      printf("Error: size of dict does not equal number of reads!\n");
 
    return result_dict;
 }
 
+
 static PyMethodDef qparserMethods[] = {
        {"parse_reads",  Py_parse_reads, METH_VARARGS,"Test UInt8 behaviour."},
        {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
-PyMODINIT_FUNC initqparser(void) {
-       (void) Py_InitModule("qparser", qparserMethods);
 
+PyMODINIT_FUNC initqparser(void) {
    seq      = malloc(sizeof(char)*buffer_size);
    prb      = malloc(sizeof(char)*buffer_size);
    cal_prb  = malloc(sizeof(char)*buffer_size);
    chastity = malloc(sizeof(char)*buffer_size);
    geneId   = malloc(sizeof(char)*buffer_size);
+
+       (void) Py_InitModule("qparser", qparserMethods);
 }
 
+
 int main(int argc, char *argv[]) {
        Py_SetProgramName(argv[0]);
        Py_Initialize();
index cd8f430..b636474 100644 (file)
@@ -2,15 +2,31 @@
 # -*- coding: utf-8 -*- 
 
 import qparser
-import sys
-
+import pdb
 
+from qpalma.parsers import *
 
 def test_module():
-   filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/new_run/allReads.full_20'
-   result = qparser.parse_reads(filename)
+   filename = 'allReads.full_20'
+   result_1 = qparser.parse_reads(filename)
+
+   result_2 = parse_filtered_reads(filename)
+
+   keys_1 = result_1.keys()
+   keys_2 = result_2.keys()
+
+   assert keys_1 == keys_2
+
+   
+   for key in keys_1:
+      elem_1 = result_1[key]
+      elem_2 = result_2[key]
+
+      pdb.set_trace()
 
-   pdb.set_trace()
+      if elem_1 != elem_2:
+         print elem_1
+         print elem_2
 
 if __name__ == '__main__':
    test_module()