+ rewrote some data storage code
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 11:44:41 +0000 (11:44 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 11:44:41 +0000 (11:44 +0000)
+ still have to find pointer-to-entry related problem

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9329 e1793c9e-67f9-0310-80fc-b846ff1f7b36

ParaParser/.ParaParser.cpp.swp
ParaParser/Makefile
ParaParser/ParaParser.cpp
ParaParser/ParaParser.h
ParaParser/simple_example.py
ParaParser/test.data

index 35e9c73..b41abc9 100644 (file)
Binary files a/ParaParser/.ParaParser.cpp.swp and b/ParaParser/.ParaParser.cpp.swp differ
index b67d52a..9229fe7 100644 (file)
@@ -1,12 +1,12 @@
-SRCS= ParaParser.cpp
-
-OBJS = $(SRCS:%.cpp=%.o)
+PROJ=ParaParser
 
 #CXXFLAGS=-O3 -fPIC
 #CXXFLAGS=-O3 -fPIC -pg -fprofile-arcs
-CXXFLAGS=-O3 -fPIC -I/usr/include/python2.5
+CXXFLAGS=-O3 -ggdb -fPIC -I/usr/include/python2.5
 
-PROJ=ParaParser
+SRCS= ParaParser.cpp
+
+OBJS = $(SRCS:%.cpp=%.o)
 
 all: $(OBJS)
        swig -c++ -python ${PROJ}.i
index 22dee7d..728d99d 100644 (file)
@@ -4,6 +4,59 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 
+
+/**
+ * Split string and return pointers to its parts.
+ *
+ * \param args The string to be split.
+ * \param argv_ptr Pointer to the list of substrings.
+ * \param delim Delimiter.
+ *
+ * This function modifies \a args by replacing each occurance of \a delim by
+ * zero. A \p NULL-terminated array of pointers to char* is allocated dynamically
+ * and these pointers are initialized to point to the broken-up substrings
+ * within \a args. A pointer to this array is returned via \a argv_ptr.
+ *
+ * \return The number of substrings found in \a args.
+ */
+
+unsigned split_args(char *args, char *** const argv_ptr, const char *delim)
+{
+   char *p = args;
+   char **argv;
+   size_t n = 0, i, j;
+
+   p = args + strspn(args, delim);
+   for (;;) {
+      i = strcspn(p, delim);
+      if (!i)
+         break;
+      p += i;
+      n++;
+      p += strspn(p, delim);
+   }
+   *argv_ptr = (char**) malloc((n + 1) * sizeof(char *));
+   argv = *argv_ptr;
+   i = 0;
+   p = args + strspn(args, delim);
+   while (p) {
+      argv[i] = p;
+      j = strcspn(p, delim);
+      if (!j)
+         break;
+      p += strcspn(p, delim);
+      if (*p) {
+         *p = '\0';
+         p++;
+         p += strspn(p, delim);
+      }
+      i++;
+   }
+   argv[n] = NULL;
+   return n;
+}
+
+
 /*
  * The constructor needs the format string to be used with sscanf and the names
  * of the respective fields for the dictionary.
@@ -11,6 +64,7 @@
  */
 
 ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
+   num_columns = num_entries;
    size_t buf_size = 512;
    format_string = (char*) malloc(sizeof(char)*buf_size);
    if (strlen(fmt) > buf_size)
@@ -24,18 +78,18 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
       if (format_string[fidx] == '%')
          format_num_entries++;
 
-   if (format_num_entries != num_entries) 
+   if (format_num_entries != num_columns) 
       printf("Error: For every entry in the format string you have to supply a name!");
 
-   field_names = (char**) malloc(sizeof(char*)*num_entries);
+   field_names = (char**) malloc(sizeof(char*)*num_columns);
 
-   for(int idx=0;idx<num_entries;idx++) {
+   for(int idx=0;idx<num_columns;idx++) {
       field_names[idx] = (char*) malloc(sizeof(char)*buf_size);
       strncpy(field_names[idx],_fields[idx],strlen(_fields[idx]));
    }
 
    // subtract number of tabs as we don't want them in the parsing
-   char* pruned_format_string = (char*)malloc(sizeof(char*)*strlen(format_string)-num_entries+2);
+   char* pruned_format_string = (char*)malloc(sizeof(char*)*strlen(format_string)-num_columns+2);
    int pruned_idx = 0;
 
    for(int idx=0;idx<strlen(format_string);idx++) {
@@ -45,16 +99,17 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
       pruned_format_string[pruned_idx] = format_string[idx];
       pruned_idx++;
    }
-   pruned_format_string[strlen(format_string)-num_entries+1] = '%';
+   pruned_format_string[strlen(format_string)-num_columns+1] = '%';
 
-   types_list = (char**) malloc(sizeof(char**)*num_entries);
+   types_list = (char**) malloc(sizeof(char**)*num_columns);
    printf("types list\n");
    char *pruned_ptr = pruned_format_string;
-   for(int f_idx=0;f_idx<num_entries;f_idx++) {
+   for(int f_idx=0;f_idx<num_columns;f_idx++) {
       char *part = strtok (pruned_ptr, "%");
       pruned_ptr = NULL;
 
-      types_list[f_idx] = (char*) malloc(sizeof(char*)*strlen(part));
+      types_list[f_idx] = (char*) malloc(sizeof(char*)*strlen(part)+1);
+      types_list[f_idx][strlen(part)] = '\0';
       strncpy(types_list[f_idx],part,strlen(part));
       printf("%s(%d) ",part,strlen(part));
    }
@@ -65,50 +120,65 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
  * 
  */
 
-void ParaParser::create_entry_from_line(const char* current_line, char* format_string, ... ) {
-   printf("current line is %s\n",current_line);
+void ParaParser::create_entry_from_line(const char* current_line, char* format_string) {
+   printf("current line is %s",current_line);
    
-   void*** current_entry = (void***) malloc(sizeof(void***));
+   // create an array of void ptrs
+   void **vptr_array = (void**) malloc(sizeof(void*)*num_columns);
 
-   char *line_ptr = (char*) malloc(sizeof(char)*strlen(current_line));
-   strncpy(line_ptr,current_line,strlen(current_line));
+   char* mutable_line = (char*) malloc(sizeof(char)*strlen(current_line));
+   strncpy(mutable_line,current_line,strlen(current_line));
 
-   char* current_type = (char*) malloc(sizeof(char)*6);
+   char** line_parts;
+   int num_parts = split_args(mutable_line,&line_parts,"\t");
 
-   for(int idx=0; idx<num_columns;idx++) {
-      char* col = strtok(line_ptr,"\t");
-      line_ptr = NULL;
-      strncpy(current_type,types_list[idx],strlen(types_list[idx]));
+   assert(num_parts == num_columns);
+
+   printf("size of map %d\n",entries->size());
 
-      printf("%s ",col);
-      printf("%s ",current_type);
+   for(int idx=0; idx<num_columns;idx++) {
+      char* col = line_parts[idx];
+      char* current_type = types_list[idx];
 
+      printf("elem:%s\n",col);
+      
       if ( strcmp(current_type,"d")==0 ) {
-         (*current_entry)[idx] = (int*) malloc(sizeof(int));
-         memcpy((*current_entry)[idx],col,sizeof(int*));
+         printf("found int\n");
+         vptr_array[idx] = (int*) malloc(sizeof(int));
+         int elem = atoi(col);
+         memcpy(vptr_array[idx],&elem,sizeof(int));
       }
+
       if ( strcmp(current_type,"f")==0 ) {
-         (*current_entry)[idx] = (double*) malloc(sizeof(double));
-         memcpy((*current_entry)[idx],col,sizeof(double*));
+         printf("found double\n");
+         vptr_array[idx] = (double*) malloc(sizeof(double));
+         double elem = atof(col);
+         memcpy(vptr_array[idx],&elem,sizeof(double));
       }
+
       if ( strcmp(current_type,"s")==0 ) {
-         (*current_entry)[idx] = (char*) malloc(sizeof(char*));
-         memcpy((*current_entry)[idx],col,sizeof(char*));
+         printf("found string\n");
+         vptr_array[idx] = (char*) malloc(sizeof(char)*strlen(col));
+         memcpy(vptr_array[idx],col,strlen(col));
       }
+
       if ( strcmp(current_type,"lu")==0 ) {
-         (*current_entry)[idx] = (unsigned long*) malloc(sizeof(unsigned long));
-         memcpy((*current_entry)[idx],col,sizeof(unsigned long*));
+         printf("found unsigned long\n");
+         vptr_array[idx] = (unsigned long*) malloc(sizeof(unsigned long));
+         unsigned long elem = strtoul(col,NULL,10);
+         memcpy(vptr_array[idx],&elem,sizeof(unsigned long));
       }
    }
-  
-   printf("\n");
 
-   int* id = (int*)(*current_entry)[0];
-   printf("id is %d\n",id);
-   (*entries)[*id] = current_entry;
+   int *id = (int*) vptr_array[0];
+   (*entries)[*id] = vptr_array;
+   printf("size of map %d\n",entries->size());
+
+   free(mutable_line);
 }
 
 
+
 int ParaParser::parseFile(char* reads_filename) {
    size_t buf_size = 512;
    char* line = (char*) malloc(sizeof(char)*buf_size);
@@ -142,7 +212,7 @@ int ParaParser::parseFile(char* reads_filename) {
    off_t reads_filesize = reads_stat.st_size;
    //printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
 
-   entries = new map<unsigned long,void***,KeyCmp>();
+   entries = new map<unsigned long,void**,KeyCmp>();
 
    // try to acquire file using mmap
    void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
@@ -177,7 +247,7 @@ int ParaParser::parseFile(char* reads_filename) {
       if (strcmp(current_line,"") == 0) 
          break;
 
-      create_entry_from_line(current_line,format_string,current_line);
+      create_entry_from_line(current_line,format_string);
       if (status != 0 )
          printf("Error while parsing line (status=%d).",status);
 
@@ -191,38 +261,69 @@ int ParaParser::parseFile(char* reads_filename) {
       readCtr += 1;
    }
 
-   // clean up
-   status = munmap(reads_area,reads_filesize);                                                                                                                                                                                 
+   printf("After parsing lines\n");
+
+   // unmap parsed file
+   status = munmap(reads_area,reads_filesize);
    if(status != 0)
       perror("munmap");
 
+   printf("After parsing lines\n");
+   // free unneeded variables
    free(current_line);
 
-   return 99;
+   printf("After parsing lines\n");
+   return readCtr;
 }
 
+
 /*
+ *
+ *
+ *
+ */
+
 PyObject* ParaParser::fetchEntry(int id) {
+   printf("begin of fetchEntry\n");
+   PyObject* line_dict = PyDict_New();
 
-   void*** current_entry = (*entries)[id];
 
-   PyObject* line_dict = PyDict_New();
+   printf("size of map %d\n",entries->size());
+
+   map<unsigned long,void**,KeyCmp>::iterator find_it = entries->find((unsigned long)id);
+   if( find_it == entries->end() )
+      return line_dict;
+
+   void** current_entry = (*entries)[id];
+
+   int* _id = (int*) current_entry[0];
+   printf("id is %d\n",*_id);
+
 
    int status;
 
+   PyObject* elem;
+
    for(int idx=0;idx<num_columns;idx++) {
       char* current_type = types_list[idx];
+      if ( strcmp(current_type,"d")==0 )
+         elem = PyInt_FromLong(*(int*)current_entry[idx]);
+
       if ( strcmp(current_type,"f")==0 )
-         status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), PyString_FromString((char*)(*current_entry)[idx]) );
-      
+         elem = PyFloat_FromDouble(*(double*)current_entry[idx]);
+
       if ( strcmp(current_type,"s")==0 )
-         status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), PyString_FromString((char*)(*current_entry)[idx]) );
+         elem = PyString_FromString((char*)current_entry[idx]);
 
       if ( strcmp(current_type,"lu")==0 || strcmp(current_type,"d")==0 )
-         status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), PyInt_FromLong(*(int*)(*current_entry)[idx]) );
-
+         elem = PyLong_FromUnsignedLong(*(int*)current_entry[idx]);
+         
+      printf("add item\n");
+      status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), elem);
    }
    
+   /*
+    *
    //size_t idx;
    //for(idx=0;idx<current_read->size;idx++) {
    //   status = PyList_SetItem( prb_list, idx, PyInt_FromLong(current_read->prb[idx]-50) );
@@ -230,6 +331,9 @@ PyObject* ParaParser::fetchEntry(int id) {
    //   status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(current_read->chastity[idx]+10) );
    //}
 
+   */
+
+   printf("end of fetchEntry\n");
    return line_dict;
 }
-*/
+
index 92c21a2..2f464b3 100644 (file)
@@ -20,13 +20,13 @@ class ParaParser{
       int num_columns;
       char** types_list;
 
-      map<unsigned long,void***,KeyCmp> *entries;
+      map<unsigned long,void**,KeyCmp> *entries;
 
    public:
       ParaParser(const char* fmt, char** _fields, int num_entries);
       int parseFile(char* reads_filename);
-      void create_entry_from_line(const char* current_line, char* format_string, ...);
-      //PyObject* fetchEntry(int id);
+      void create_entry_from_line(const char* current_line, char* format_string);
+      PyObject* fetchEntry(int id);
 
       ~ParaParser(){}
 };
index 747e147..8b451b9 100644 (file)
@@ -7,10 +7,12 @@ from ParaParser import *
 def run(file):
    parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5)
    parser.parseFile(file)
+   print "After parseFile call..."
    entry1_dict = parser.fetchEntry(101)
+   print "Fetching entry..."
    print entry1_dict
-   entry2_dict = parser.fetchEntry(102)
-   print entry2_dict
+   #entry2_dict = parser.fetchEntry(102)
+   #print entry2_dict
 
 if __name__ == '__main__':
    run('test.data')
index 88a2821..75f7d37 100644 (file)
@@ -1,2 +1,2 @@
-11111111       alpha    beta    1       2
-22222222       gamma    delta  99      100
+1111   alpha   beta    1       2
+2222   gamma    delta  99      100