+ found string splitting bug
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 12:18:51 +0000 (12:18 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 12:18:51 +0000 (12:18 +0000)
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9330 e1793c9e-67f9-0310-80fc-b846ff1f7b36

ParaParser/.ParaParser.cpp.swp
ParaParser/Makefile
ParaParser/ParaParser.cpp
ParaParser/ParaParser.h
ParaParser/simple_example.py

index b41abc9..e92973b 100644 (file)
Binary files a/ParaParser/.ParaParser.cpp.swp and b/ParaParser/.ParaParser.cpp.swp differ
index 9229fe7..b03775a 100644 (file)
@@ -2,7 +2,7 @@ PROJ=ParaParser
 
 #CXXFLAGS=-O3 -fPIC
 #CXXFLAGS=-O3 -fPIC -pg -fprofile-arcs
-CXXFLAGS=-O3 -ggdb -fPIC -I/usr/include/python2.5
+CXXFLAGS=-Wall -O3 -ggdb -fPIC -I/usr/include/python2.5
 
 SRCS= ParaParser.cpp
 
index 728d99d..6d019a5 100644 (file)
@@ -64,7 +64,8 @@ unsigned split_args(char *args, char *** const argv_ptr, const char *delim)
  */
 
 ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
-   num_columns = num_entries;
+   assert(num_entries>0);
+   num_columns = (size_t) num_entries;
    size_t buf_size = 512;
    format_string = (char*) malloc(sizeof(char)*buf_size);
    if (strlen(fmt) > buf_size)
@@ -73,8 +74,8 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
    strncpy(format_string,fmt,strlen(fmt));
 
    // count how many entries are parsed in one line (number of %'s)
-   int format_num_entries = 0;
-   for(int fidx=0;fidx<strlen(fmt);fidx++)
+   size_t format_num_entries = 0;
+   for(size_t fidx=0;fidx<strlen(fmt);fidx++)
       if (format_string[fidx] == '%')
          format_num_entries++;
 
@@ -83,7 +84,7 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
 
    field_names = (char**) malloc(sizeof(char*)*num_columns);
 
-   for(int idx=0;idx<num_columns;idx++) {
+   for(size_t idx=0;idx<num_columns;idx++) {
       field_names[idx] = (char*) malloc(sizeof(char)*buf_size);
       strncpy(field_names[idx],_fields[idx],strlen(_fields[idx]));
    }
@@ -92,7 +93,7 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
    char* pruned_format_string = (char*)malloc(sizeof(char*)*strlen(format_string)-num_columns+2);
    int pruned_idx = 0;
 
-   for(int idx=0;idx<strlen(format_string);idx++) {
+   for(size_t idx=0;idx<strlen(format_string);idx++) {
       if (format_string[idx] == '\t')
          continue;
 
@@ -104,7 +105,7 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
    types_list = (char**) malloc(sizeof(char**)*num_columns);
    printf("types list\n");
    char *pruned_ptr = pruned_format_string;
-   for(int f_idx=0;f_idx<num_columns;f_idx++) {
+   for(size_t f_idx=0;f_idx<num_columns;f_idx++) {
       char *part = strtok (pruned_ptr, "%");
       pruned_ptr = NULL;
 
@@ -124,54 +125,30 @@ void ParaParser::create_entry_from_line(const char* current_line, char* format_s
    printf("current line is %s",current_line);
    
    // create an array of void ptrs
-   void **vptr_array = (void**) malloc(sizeof(void*)*num_columns);
+   char **current_entries = (char**) malloc(sizeof(char*)*num_columns);
 
    char* mutable_line = (char*) malloc(sizeof(char)*strlen(current_line));
    strncpy(mutable_line,current_line,strlen(current_line));
+   mutable_line[strlen(current_line)-1] = '\t';
 
    char** line_parts;
-   int num_parts = split_args(mutable_line,&line_parts,"\t");
-
-   assert(num_parts == num_columns);
+   size_t num_parts = split_args(mutable_line,&line_parts,"\t");
 
    printf("size of map %d\n",entries->size());
 
-   for(int idx=0; idx<num_columns;idx++) {
+   for(size_t idx=0; idx<num_columns;idx++) {
       char* col = line_parts[idx];
       char* current_type = types_list[idx];
 
       printf("elem:%s\n",col);
-      
-      if ( strcmp(current_type,"d")==0 ) {
-         printf("found int\n");
-         vptr_array[idx] = (int*) malloc(sizeof(int));
-         int elem = atoi(col);
-         memcpy(vptr_array[idx],&elem,sizeof(int));
-      }
-
-      if ( strcmp(current_type,"f")==0 ) {
-         printf("found double\n");
-         vptr_array[idx] = (double*) malloc(sizeof(double));
-         double elem = atof(col);
-         memcpy(vptr_array[idx],&elem,sizeof(double));
-      }
-
-      if ( strcmp(current_type,"s")==0 ) {
-         printf("found string\n");
-         vptr_array[idx] = (char*) malloc(sizeof(char)*strlen(col));
-         memcpy(vptr_array[idx],col,strlen(col));
-      }
-
-      if ( strcmp(current_type,"lu")==0 ) {
-         printf("found unsigned long\n");
-         vptr_array[idx] = (unsigned long*) malloc(sizeof(unsigned long));
-         unsigned long elem = strtoul(col,NULL,10);
-         memcpy(vptr_array[idx],&elem,sizeof(unsigned long));
-      }
+      current_entries[idx] = (char*) malloc(sizeof(char)*strlen(col)+1);
+      strncpy(current_entries[idx],col,strlen(col));
+      current_entries[idx][strlen(col)] = '\0';
    }
 
-   int *id = (int*) vptr_array[0];
-   (*entries)[*id] = vptr_array;
+   int id = atoi(current_entries[0]);
+   printf("id is %d\n",id);
+   (*entries)[id] = (void**) current_entries;
    printf("size of map %d\n",entries->size());
 
    free(mutable_line);
@@ -241,8 +218,6 @@ int ParaParser::parseFile(char* reads_filename) {
    int readCtr = 0;
    int status = 0;
 
-   int num_reads = 0;
-
    while(1) {
       if (strcmp(current_line,"") == 0) 
          break;
@@ -287,45 +262,49 @@ PyObject* ParaParser::fetchEntry(int id) {
    printf("begin of fetchEntry\n");
    PyObject* line_dict = PyDict_New();
 
-
    printf("size of map %d\n",entries->size());
 
    map<unsigned long,void**,KeyCmp>::iterator find_it = entries->find((unsigned long)id);
    if( find_it == entries->end() )
       return line_dict;
 
-   void** current_entry = (*entries)[id];
-
-   int* _id = (int*) current_entry[0];
-   printf("id is %d\n",*_id);
+   char** current_entry = (char**) (*entries)[id];
 
+   int _id = atoi(current_entry[0]);
+   printf("id is %d\n",_id);
 
    int status;
+      
+   for(size_t idx=0;idx<num_columns;idx++) {
+      char* current_type = types_list[idx];
+      char* current = current_entry[idx];
 
-   PyObject* elem;
+      // init elem to make compiler happy
+      PyObject* elem = 0;
 
-   for(int idx=0;idx<num_columns;idx++) {
-      char* current_type = types_list[idx];
       if ( strcmp(current_type,"d")==0 )
-         elem = PyInt_FromLong(*(int*)current_entry[idx]);
+         elem = PyInt_FromString(current,NULL,10);
 
       if ( strcmp(current_type,"f")==0 )
-         elem = PyFloat_FromDouble(*(double*)current_entry[idx]);
+         elem = PyFloat_FromString(PyString_FromString(current),NULL);
 
       if ( strcmp(current_type,"s")==0 )
-         elem = PyString_FromString((char*)current_entry[idx]);
+         elem = PyString_FromString(current);
 
-      if ( strcmp(current_type,"lu")==0 || strcmp(current_type,"d")==0 )
-         elem = PyLong_FromUnsignedLong(*(int*)current_entry[idx]);
+      if ( strcmp(current_type,"lu")==0 )
+         elem = PyLong_FromString(current,NULL,10);
          
+      if (elem == 0)
+         printf("Error: type %s/ elem %s\n",current_type,current);
+      
       printf("add item\n");
+
       status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), elem);
    }
    
    /*
-    *
-   //size_t idx;
-   //for(idx=0;idx<current_read->size;idx++) {
+   //;
+   //for(size_t idx=0;idx<current_read->size;idx++) {
    //   status = PyList_SetItem( prb_list, idx, PyInt_FromLong(current_read->prb[idx]-50) );
    //   status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(current_read->cal_prb[idx]-64) );
    //   status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(current_read->chastity[idx]+10) );
@@ -336,4 +315,3 @@ PyObject* ParaParser::fetchEntry(int id) {
    printf("end of fetchEntry\n");
    return line_dict;
 }
-
index 2f464b3..2a5ba00 100644 (file)
@@ -17,7 +17,7 @@ class ParaParser{
    private:
       char* format_string;
       char** field_names;
-      int num_columns;
+      size_t num_columns;
       char** types_list;
 
       map<unsigned long,void**,KeyCmp> *entries;
index 8b451b9..1d018b5 100644 (file)
@@ -8,11 +8,11 @@ def run(file):
    parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5)
    parser.parseFile(file)
    print "After parseFile call..."
-   entry1_dict = parser.fetchEntry(101)
+   entry1_dict = parser.fetchEntry(1111)
    print "Fetching entry..."
    print entry1_dict
-   #entry2_dict = parser.fetchEntry(102)
-   #print entry2_dict
+   entry2_dict = parser.fetchEntry(2222)
+   print entry2_dict
 
 if __name__ == '__main__':
    run('test.data')