+ modified module in order to act like a database returning a dictionary of the
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 15 May 2008 16:37:57 +0000 (16:37 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Thu, 15 May 2008 16:37:57 +0000 (16:37 +0000)
line given a particular id

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9039 e1793c9e-67f9-0310-80fc-b846ff1f7b36

cparser/qparser.c
cparser/read.h [new file with mode: 0644]
cparser/test.py

index 0d03f79..3e52d16 100644 (file)
 #include <sys/mman.h>
 #include <sys/stat.h>
 
+
+#include "read.h"
+
 #define WITH_QUALITIES 0
 
+// these two array store the reads respective their ids
+unsigned long *id_map;
+
+int map_idx;
+
+Read **read_array;
+
+int num_reads;
+
+
 // the line format is defined as follows
 // id chr strand seq splitpos size q1 q2 q3 geneId p1 p2 p3 p4 true_cut
 const char* line_format = "%lu\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n";
-int buffer_size= 64;                                                                                                                                                                                                        
-unsigned long id = 0;
-int chr        = 0;
-char strand    = ' ';
-int splitpos   = 0;
-int size       = 0;
-int p_start    = 0;
-int exon_stop  = 0;
-int exon_start = 0;
-int p_stop     = 0;
-int true_cut   = 0;
-
-char* seq      = 0;
-char* prb      = 0;
-char* cal_prb  = 0;
-char* chastity = 0;
-
-char* geneId   = 0;
 
+int create_read_from_line(Read* newRead, const char* current_line) {
 
-static int set_item_from_line(PyObject *result_dict, const char* current_line) {
-   
-   // increment the reference count for the result_dict object because we want
-   // ot modify it
-   Py_INCREF( result_dict );
+   int size = 72;
+   newRead->seq      = malloc(sizeof(char)*(size));
 
-   //printf("current line is %s\n",current_line);
-   int entries_found = sscanf(current_line,line_format,&id,&chr,&strand,seq,&splitpos,&size,prb,cal_prb,chastity,geneId,&p_start,&exon_stop,&exon_start,&p_stop,&true_cut);
+   size = 36;
+   newRead->prb      = malloc(sizeof(char)*(size));
+   newRead->cal_prb  = malloc(sizeof(char)*(size));
+   newRead->chastity = malloc(sizeof(char)*(size));
+   newRead->gene_id  = malloc(sizeof(char)*(size));
 
-   if (entries_found != 15) {
-      return entries_found;
+   int entries_found = sscanf(current_line,line_format,&(newRead->id),
+   &(newRead->chr),&(newRead->strand),newRead->seq,&(newRead->splitpos),&(newRead->size),
+   newRead->prb,newRead->cal_prb,newRead->chastity,newRead->gene_id,&(newRead->p_start),
+   &(newRead->exon_stop),&(newRead->exon_start),&(newRead->p_stop),&(newRead->true_cut));
+
+   Py_ssize_t idx;
+   for(idx=0;idx<strlen(newRead->seq);idx++) {
+      if ( 65 <= newRead->seq[idx] && newRead->seq[idx] < 85)
+         newRead->seq[idx] = newRead->seq[idx]+32;
    }
 
-   //printf("after sscanf\n");
-   int status;
+   for(idx=0;idx<size;idx++) {
+      newRead->prb[idx]       -= 50;
+      newRead->cal_prb[idx]   -= 64;
+      newRead->chastity[idx]  += 10;
+   }
 
-   // create dictionary representing one line
-   //PyObject* entry_dict = PyDict_New();
+   if ( newRead->strand == 'D' )
+      newRead->strand = '+';
 
-   // alternative way using a list instead of a dictionary
-   PyObject* entry_list = PyList_New(15);
-   Py_INCREF( entry_list );
+   if ( newRead->strand == 'P' )
+      newRead->strand = '-';
 
-   PyObject *id_py = PyInt_FromLong(id);
-   PyObject *strand_py = PyString_FromString("--");
+   return entries_found;
+}
 
-   if ( strand == 'D' )
-      strand_py = PyString_FromString("+");
 
-   if ( strand == 'P' )
-      strand_py = PyString_FromString("-");
+static void Py_free_everything(){
 
-   //printf("before : %s\n",seq);
-   Py_ssize_t idx;
-   for(idx=0;idx<strlen(seq);idx++) {
-      if ( 65 <= seq[idx] && seq[idx] < 85)
-         seq[idx] = seq[idx]+32;
-   }
-   //printf("after : %s\n",seq);
+   size_t idx;
+   for(idx=0;idx<num_reads;idx++)
+      free_read(read_array[idx]);
 
-   // add entries of that line
-   
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("id"),         id_py );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("chr"),        PyInt_FromLong(chr) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("seq"),        PyString_FromString(seq) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("strand"),     strand_py );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("splitpos"),   PyInt_FromLong(splitpos) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("read_size"),       PyInt_FromLong(size) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("true_cut"),       PyInt_FromLong(true_cut) );
+   free(read_array);
+   free(id_map);
+}
 
-   status = PyList_SetItem(entry_list, 0,  id_py );
-   status = PyList_SetItem(entry_list, 1,  PyInt_FromLong(chr) );
-   status = PyList_SetItem(entry_list, 2,  PyString_FromString(seq) );
-   status = PyList_SetItem(entry_list, 3,  strand_py );
-   status = PyList_SetItem(entry_list, 4,  PyInt_FromLong(splitpos) );
-   status = PyList_SetItem(entry_list, 5,       PyInt_FromLong(size) );
-   status = PyList_SetItem(entry_list, 6,      PyInt_FromLong(true_cut) );
 
-   PyObject* prb_list = PyList_New(size);
-   PyObject* cal_prb_list = PyList_New(size);
-   PyObject* chastity_list = PyList_New(size);
+static int set_item_from_line(const char* current_line) {
+   printf("current line is %s\n",current_line);
 
-#if WITH_QUALITIES
-   for(idx=0;idx<size;idx++) {
-      status = PyList_SetItem( prb_list, idx, PyInt_FromLong(prb[idx]-50) );
-      status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(cal_prb[idx]-64) );
-      status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(chastity[idx]+10) );
+   Read* current_read = read_alloc();
+   int entries_found = create_read_from_line(current_read,current_line);
+
+   if (entries_found != 15) {
+      return entries_found;
    }
 
-   status = PyList_SetItem(entry_list, 7,    prb_list );
-   status = PyList_SetItem(entry_list, 8,    cal_prb_list );
-   status = PyList_SetItem(entry_list, 9,    chastity_list );
-#else
-   status = PyList_SetItem(entry_list, 7,    PyString_FromString("") );
-   status = PyList_SetItem(entry_list, 8,    PyString_FromString("") );
-   status = PyList_SetItem(entry_list, 9,    PyString_FromString("") );
-#endif 
+   read_array[num_reads] = current_read;
+   id_map[num_reads] = current_read->id;
+   num_reads++;
+}
 
 
-   status = PyList_SetItem(entry_list, 10, PyString_FromString(geneId) );
-   status = PyList_SetItem(entry_list, 11,    PyInt_FromLong(p_start) );
-   status = PyList_SetItem(entry_list, 12,  PyInt_FromLong(exon_stop) );
-   status = PyList_SetItem(entry_list, 13, PyInt_FromLong(exon_start) );
-   status = PyList_SetItem(entry_list, 14,     PyInt_FromLong(p_stop) );
 
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("prb"),        prb_list );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("cal_prb"),    cal_prb_list );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("chastity"),   chastity_list );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("gene_id"), PyString_FromString(geneId) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("p_start"),    PyInt_FromLong(p_start) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("exon_stop"),  PyInt_FromLong(exon_stop) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("exon_start"), PyInt_FromLong(exon_start) );
-   //status = PyDict_SetItem(entry_dict, PyString_FromString("p_stop"),     PyInt_FromLong(p_stop) );
+static void assign_read(PyObject *read_dict, int index) {
 
-   // now save the dictionary representing one line in the dictionary
-   // representing the whole file
-   //status = PyDict_SetItem(result_dict, id_py, entry_dict);
+   int status;
 
-   status = PyDict_SetItem(result_dict, id_py, entry_list);
-   if (status != 0) {
-               PyErr_Warn(PyExc_Warning, "qparser.parse_reads: Failed to add item!");
+   Read* current_read = read_array[index];
+
+   status = PyDict_SetItem(read_dict, PyString_FromString("id"),         PyInt_FromLong(current_read->id) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("chr"),        PyInt_FromLong(current_read->chr) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("seq"),        PyString_FromString(current_read->seq) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("strand"),     PyString_FromString(current_read->strand) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("splitpos"),   PyInt_FromLong(current_read->splitpos) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("read_size"),       PyInt_FromLong(current_read->size) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("true_cut"),       PyInt_FromLong(current_read->true_cut) );
+
+   PyObject* prb_list = PyList_New(current_read->size);
+   PyObject* cal_prb_list = PyList_New(current_read->size);
+   PyObject* chastity_list = PyList_New(current_read->size);
+
+   size_t idx;
+   for(idx=0;idx<current_read->size;idx++) {
+      status = PyList_SetItem( prb_list, idx, PyInt_FromLong(current_read->prb[idx]) );
+      status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(current_read->cal_prb[idx]) );
+      status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(current_read->chastity[idx]) );
    }
 
-   Py_DECREF( entry_list );
-   // decrement the reference count as we are finished with the local
-   // modification of the object
-   Py_DECREF( result_dict );
+   status = PyDict_SetItem(read_dict, PyString_FromString("prb"),        prb_list );
+   status = PyDict_SetItem(read_dict, PyString_FromString("cal_prb"),    cal_prb_list );
+   status = PyDict_SetItem(read_dict, PyString_FromString("chastity"),   chastity_list );
+   status = PyDict_SetItem(read_dict, PyString_FromString("gene_id"), PyString_FromString(current_read->gene_id) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("p_start"),    PyInt_FromLong(current_read->p_start) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("exon_stop"),  PyInt_FromLong(current_read->exon_stop) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("exon_start"), PyInt_FromLong(current_read->exon_start) );
+   status = PyDict_SetItem(read_dict, PyString_FromString("p_stop"),     PyInt_FromLong(current_read->p_stop) );
 
-   return status;
 }
 
+
+static PyObject * Py_fetch_read(PyObject *obj, PyObject *args) {
+   // first define some constant strings
+   unsigned long read_id;
+   unsigned long current_read_id;
+
+   if (!PyArg_ParseTuple(args, "i", &read_id)) {
+               PyErr_Warn(PyExc_Warning, "qparser.parse_reads: Invalid parameters.");
+      return NULL;
+   }
+   
+   PyObject* read_dict = PyDict_New();
+
+   size_t idx;
+   for(idx=0;idx<num_reads;idx++) {
+      current_read_id = id_map[idx];
+      if (current_read_id == read_id) {
+         assign_read(read_dict,idx);
+      }
+   }
+
+   return read_dict;
+}
+
+
 /*
  * This function parses the original reads file and stores the lines in a
  * dictionary indexed by the key.
@@ -172,7 +182,15 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
 
    off_t reads_filesize = reads_stat.st_size;                                                                                                                                                                                  
    printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
-   //int numReads = reads_filesize / 178.0;
+
+   // ATTENTION this is an overestimator of the reads in the file
+   // it is NOT the exact number
+   int numReads = reads_filesize / 200.0;
+
+   read_array = malloc(sizeof(Read*)*numReads);
+   id_map = malloc(sizeof(unsigned long)*numReads);
+
+   //printf("Found %d reads.",numReads);
 
    // try to acquire file using mmap
    void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
@@ -201,15 +219,14 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
    int readCtr = 0;
    int status = 0;
 
-   // The result dict stores all lines in the form of small dictionaries
-   // it is indexed by the unique id of the read.
-   PyObject* result_dict = PyDict_New();
+   num_reads = 0;
+   map_idx = 0;
 
    while(1) {
       if (strcmp(current_line,"") == 0) 
          break;
 
-      status = set_item_from_line(result_dict,current_line);
+      status = set_item_from_line(current_line);
       if (status != 0 )
          printf("Error while parsing line (status=%d).",status);
 
@@ -223,62 +240,23 @@ static PyObject * Py_parse_reads(PyObject *obj, PyObject *args) {
       readCtr += 1;
    }
 
-   // create a dictionary that maps attributes to their respective index in the
-   // list
-   PyObject* map_dict = PyDict_New();
-
-   status = PyDict_SetItem(map_dict, PyString_FromString("id"),         PyInt_FromLong(0) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("chr"),        PyInt_FromLong(1) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("seq"),        PyInt_FromLong(2) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("strand"),     PyInt_FromLong(3) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("splitpos"),   PyInt_FromLong(4) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("read_size"),  PyInt_FromLong(5) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("true_cut"),   PyInt_FromLong(6) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("prb"),        PyInt_FromLong(7) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("cal_prb"),    PyInt_FromLong(8) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("chastity"),   PyInt_FromLong(9) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("gene_id"),    PyInt_FromLong(10) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("p_start"),    PyInt_FromLong(11) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("exon_stop"),  PyInt_FromLong(12) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("exon_start"), PyInt_FromLong(13) );
-   status = PyDict_SetItem(map_dict, PyString_FromString("p_stop"),     PyInt_FromLong(14) );
-
-   // create result tuple
-   PyObject *result_tuple = PyTuple_Pack( 2, map_dict, result_dict );
-
    // clean up
    status = munmap(reads_area,reads_filesize);                                                                                                                                                                                 
    if(status != 0)
       perror("munmap");
 
-   free(seq);
-   free(prb);
-   free(cal_prb);
-   free(chastity);
-   free(geneId);
-
-   Py_ssize_t result_size = PyDict_Size( result_dict );
-   
-   if ( result_size != readCtr )
-      printf("Error: size of dict does not equal number of reads!\n");
-
-   return result_tuple;
+   return PyInt_FromLong(0);
 }
 
 
 static PyMethodDef qparserMethods[] = {
        {"parse_reads",  Py_parse_reads, METH_VARARGS,"Test UInt8 behaviour."},
+       {"free_everything",  Py_free_everything, METH_VARARGS,"Test UInt8 behaviour."},
        {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
 
 PyMODINIT_FUNC initqparser(void) {
-   seq      = malloc(sizeof(char)*buffer_size);
-   prb      = malloc(sizeof(char)*buffer_size);
-   cal_prb  = malloc(sizeof(char)*buffer_size);
-   chastity = malloc(sizeof(char)*buffer_size);
-   geneId   = malloc(sizeof(char)*buffer_size);
-
        (void) Py_InitModule("qparser", qparserMethods);
 }
 
@@ -288,3 +266,117 @@ int main(int argc, char *argv[]) {
        Py_Initialize();
        initqparser();
 }
+
+
+/*
+static int set_item_from_line(PyObject *result_dict, const char* current_line) {
+   
+   // increment the reference count for the result_dict object because we want
+   // ot modify it
+   Py_INCREF( result_dict );
+
+   //printf("current line is %s\n",current_line);
+   int entries_found = sscanf(current_line,line_format,&id,&chr,&strand,seq,&splitpos,&size,prb,cal_prb,chastity,geneId,&p_start,&exon_stop,&exon_start,&p_stop,&true_cut);
+
+   if (entries_found != 15) {
+      return entries_found;
+   }
+
+   //printf("after sscanf\n");
+   int status;
+
+   // create dictionary representing one line
+   //PyObject* entry_dict = PyDict_New();
+
+   // alternative way using a list instead of a dictionary
+   PyObject* entry_list = PyList_New(15);
+   Py_INCREF( entry_list );
+
+   PyObject *id_py = PyInt_FromLong(id);
+   PyObject *strand_py = PyString_FromString("--");
+
+   if ( strand == 'D' )
+      strand_py = PyString_FromString("+");
+
+   if ( strand == 'P' )
+      strand_py = PyString_FromString("-");
+
+   //printf("before : %s\n",seq);
+   Py_ssize_t idx;
+   for(idx=0;idx<strlen(seq);idx++) {
+      if ( 65 <= seq[idx] && seq[idx] < 85)
+         seq[idx] = seq[idx]+32;
+   }
+   //printf("after : %s\n",seq);
+
+   // add entries of that line
+   
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("id"),         id_py );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("chr"),        PyInt_FromLong(chr) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("seq"),        PyString_FromString(seq) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("strand"),     strand_py );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("splitpos"),   PyInt_FromLong(splitpos) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("read_size"),       PyInt_FromLong(size) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("true_cut"),       PyInt_FromLong(true_cut) );
+
+   status = PyList_SetItem(entry_list, 0,  id_py );
+   status = PyList_SetItem(entry_list, 1,  PyInt_FromLong(chr) );
+   status = PyList_SetItem(entry_list, 2,  PyString_FromString(seq) );
+   status = PyList_SetItem(entry_list, 3,  strand_py );
+   status = PyList_SetItem(entry_list, 4,  PyInt_FromLong(splitpos) );
+   status = PyList_SetItem(entry_list, 5,       PyInt_FromLong(size) );
+   status = PyList_SetItem(entry_list, 6,      PyInt_FromLong(true_cut) );
+
+   PyObject* prb_list = PyList_New(size);
+   PyObject* cal_prb_list = PyList_New(size);
+   PyObject* chastity_list = PyList_New(size);
+
+#if WITH_QUALITIES
+   for(idx=0;idx<size;idx++) {
+      status = PyList_SetItem( prb_list, idx, PyInt_FromLong(prb[idx]-50) );
+      status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(cal_prb[idx]-64) );
+      status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(chastity[idx]+10) );
+   }
+
+   status = PyList_SetItem(entry_list, 7,    prb_list );
+   status = PyList_SetItem(entry_list, 8,    cal_prb_list );
+   status = PyList_SetItem(entry_list, 9,    chastity_list );
+#else
+   status = PyList_SetItem(entry_list, 7,    PyString_FromString("") );
+   status = PyList_SetItem(entry_list, 8,    PyString_FromString("") );
+   status = PyList_SetItem(entry_list, 9,    PyString_FromString("") );
+#endif 
+
+
+   status = PyList_SetItem(entry_list, 10, PyString_FromString(geneId) );
+   status = PyList_SetItem(entry_list, 11,    PyInt_FromLong(p_start) );
+   status = PyList_SetItem(entry_list, 12,  PyInt_FromLong(exon_stop) );
+   status = PyList_SetItem(entry_list, 13, PyInt_FromLong(exon_start) );
+   status = PyList_SetItem(entry_list, 14,     PyInt_FromLong(p_stop) );
+
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("prb"),        prb_list );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("cal_prb"),    cal_prb_list );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("chastity"),   chastity_list );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("gene_id"), PyString_FromString(geneId) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("p_start"),    PyInt_FromLong(p_start) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("exon_stop"),  PyInt_FromLong(exon_stop) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("exon_start"), PyInt_FromLong(exon_start) );
+   //status = PyDict_SetItem(entry_dict, PyString_FromString("p_stop"),     PyInt_FromLong(p_stop) );
+
+   // now save the dictionary representing one line in the dictionary
+   // representing the whole file
+   //status = PyDict_SetItem(result_dict, id_py, entry_dict);
+
+   status = PyDict_SetItem(result_dict, id_py, entry_list);
+   if (status != 0) {
+               PyErr_Warn(PyExc_Warning, "qparser.parse_reads: Failed to add item!");
+   }
+
+   Py_DECREF( entry_list );
+   // decrement the reference count as we are finished with the local
+   // modification of the object
+   Py_DECREF( result_dict );
+
+   return status;
+}
+*/
diff --git a/cparser/read.h b/cparser/read.h
new file mode 100644 (file)
index 0000000..318364e
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef __READ_H__
+#define __READ_H__
+
+typedef struct read {
+   unsigned long id;
+
+   int chr;
+   char strand;
+   int splitpos;
+   int size;
+   int p_start;
+   int exon_stop;
+   int exon_start;
+   int p_stop;
+   int true_cut;
+
+   char* seq;
+   char* prb;
+   char* cal_prb;
+   char* chastity;
+
+   char* gene_id;
+} Read;
+
+
+void init_read(Read *r) {
+   int buffer_size = 64;
+
+   r->id          = 0;
+   r->chr         = 0;
+   r->strand      = '*';
+   r->splitpos    = 0;
+   r->size        = 0;
+   r->p_start     = 0;
+   r->exon_stop   = 0;
+   r->exon_start  = 0;
+   r->p_stop      = 0;
+   r->true_cut    = 0;
+   r->gene_id      = 0;
+
+   r->seq      = malloc(sizeof(char)*buffer_size);
+   r->prb      = malloc(sizeof(char)*buffer_size);
+   r->cal_prb  = malloc(sizeof(char)*buffer_size);
+   r->chastity = malloc(sizeof(char)*buffer_size);
+   r->gene_id   = malloc(sizeof(char)*buffer_size);
+}
+
+
+Read* read_alloc() {
+   Read* newRead = (Read*) malloc(sizeof(struct read));
+   assert( newRead != NULL );
+
+   init_read(newRead);
+
+   return newRead;
+}
+
+
+void free_read(Read* oldRead) {
+   if(oldRead->seq != 0)
+      free(oldRead->seq);
+   if(oldRead->prb != 0)
+      free(oldRead->prb);
+   if(oldRead->cal_prb != 0)
+      free(oldRead->cal_prb);
+   if(oldRead->chastity != 0)
+      free(oldRead->chastity);
+   if(oldRead->gene_id != 0)
+      free(oldRead->gene_id);
+}
+
+
+void create_read(Read* newRead, unsigned long id, int chr, char strand, char* seq, int splitpos, int size, char* prb, char* cal_prb, char* chastity, char* gene_id, int p_start, int exon_stop, int exon_start, int p_stop, int true_cut) {
+
+   newRead->id          = id;
+   newRead->chr         = chr;
+   newRead->strand      = strand;
+   newRead->splitpos    = splitpos;
+   newRead->size        = size;
+
+   newRead->p_start     = p_start;
+   newRead->exon_stop   =  exon_stop;
+   newRead->exon_start  = exon_start;
+   newRead->p_stop      = p_stop;
+
+   newRead->true_cut    = true_cut;
+
+   newRead->seq      = malloc(sizeof(char)*strlen(seq));
+   newRead->prb      = malloc(sizeof(char)*(size));
+   newRead->cal_prb  = malloc(sizeof(char)*(size));
+   newRead->chastity = malloc(sizeof(char)*(size));
+
+   newRead->gene_id  = malloc(sizeof(char)*(strlen(gene_id)));
+
+   strncpy(newRead->seq,seq,strlen(seq));
+   strncpy(newRead->prb,prb,size);
+   strncpy(newRead->cal_prb,cal_prb,size);
+   strncpy(newRead->chastity,chastity,size);
+   strncpy(newRead->gene_id,gene_id,strlen(gene_id));
+}
+
+
+
+#endif // __READ_H__
index 44bbc54..abbaf87 100644 (file)
@@ -13,6 +13,17 @@ def cpu():
 
 
 def test_module():
+   filename = 'allReads.full_20'
+   #filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/new_run/allReads.full_10k'
+
+   start = cpu()
+   num_reads = qparser.parse_reads(filename)
+   stop = cpu()
+
+   print 'found %d num reads' % num_reads
+
+
+def _test_module():
    filename = '/fml/ag-raetsch/share/projects/qpalma/solexa/new_run/allReads.full_10k'
 
    start = cpu()