+ added VECTOR and MAP mode to ParaParser in order to handle entries with same
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 15:24:36 +0000 (15:24 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 15:24:36 +0000 (15:24 +0000)
ids

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9335 e1793c9e-67f9-0310-80fc-b846ff1f7b36

ParaParser/.ParaParser.cpp.swp
ParaParser/ParaParser.cpp
ParaParser/ParaParser.h
ParaParser/simple_example.py

index 43117d0..ee493c4 100644 (file)
Binary files a/ParaParser/.ParaParser.cpp.swp and b/ParaParser/.ParaParser.cpp.swp differ
index c7c3151..0c326ba 100644 (file)
@@ -6,6 +6,22 @@
 #include <string>
 using namespace std;
 
+/*
+ *
+ *
+ */
+
+/*
+bool check_for_well_formed_format_string(const char* fmt) {
+   types_list = (char**) malloc(sizeof(char**)*num_columns);
+   char *pruned_ptr = pruned_format_string;
+   //printf("types list\n");
+   for(size_t f_idx=0;f_idx<num_columns;f_idx++) {
+      char *part = strtok (pruned_ptr, "%");
+      pruned_ptr = NULL;
+
+}
+*/
 
 /**
  * Split string and return pointers to its parts.
@@ -65,11 +81,12 @@ unsigned split_args(char *args, char *** const argv_ptr, const char *delim)
  *
  */
 
-ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
+ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries, storage_mode mode) {
    // check that we have more than zero entries and that the format string
    // contains exactly num_entries format elements.
-   assert(num_entries>0);
-
+   if ( num_entries < 1 )
+      printf("Error: You need at least one field !\n");
+      
    num_columns = (size_t) num_entries;
 
    // count how many entries are parsed in one line (number of %'s)
@@ -88,7 +105,9 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
    field_names = (char**) malloc(sizeof(char*)*num_columns);
    for(size_t idx=0;idx<num_columns;idx++) {
       field_names[idx] = (char*) malloc(sizeof(char)*buf_size);
-      strncpy(field_names[idx],_fields[idx],strlen(_fields[idx]));
+      strncpy(field_names[idx],_fields[idx],strlen(_fields[idx])+1);
+      field_names[idx][strlen(_fields[idx])] = '\0';
+      //printf("%s\n",field_names[idx]);
    }
 
    char* pruned_format_string = (char*) malloc(sizeof(char)*buf_size);
@@ -115,6 +134,12 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
       //printf("%s(%d) ",part,strlen(part));
    }
    //printf("\n");
+   //
+   current_mode = mode;
+   if(current_mode !=  IN_VECTOR && current_mode != IN_MAP) {
+      printf("Error: Wrong save mode!");
+      exit(EXIT_FAILURE);
+   }
 }
 
 
@@ -148,12 +173,19 @@ void ParaParser::create_entry_from_line(const char* current_line, char* format_s
    }
    free(mutable_line);
 
-   int id = atoi(current_entries[0]);
-   //printf("id is %d\n",id);
-   //printf("size of map %d\n",entries->size());
-   (*entries)[id] = current_entries;
-   //printf("size of map %d\n",entries->size());
+   map_key_t id = strtoul(current_entries[0],NULL,10);
+
+   if ( current_mode == IN_VECTOR ) {
+      printf("size is %zd\n",v_entries->size());
+      v_entries->push_back(current_entries);
+      printf("size is %zd\n",v_entries->size());
+   }
 
+   if ( current_mode == IN_MAP ) {
+      printf("size is %zd\n",entries->size());
+      (*entries)[id] = current_entries;
+      printf("size is %zd\n",entries->size());
+   }
 }
 
 
@@ -196,7 +228,11 @@ int ParaParser::parseFile(char* reads_filename) {
    off_t reads_filesize = reads_stat.st_size;
    //printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
 
-   entries = new MAP();
+   if ( current_mode == IN_VECTOR )
+      v_entries = new VECTOR();
+
+   if ( current_mode == IN_MAP )
+      entries = new MAP();
 
    // try to acquire file using mmap
    void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
@@ -262,7 +298,7 @@ int ParaParser::parseFile(char* reads_filename) {
  *
  */
 
-PyObject* ParaParser::fetchEntry(int id) {
+PyObject* ParaParser::fetchEntry(map_key_t id) {
    PyObject* line_dict = PyDict_New();
    //printf("begin of fetchEntry\n");
    //printf("size of map %d\n",entries->size());
@@ -270,13 +306,29 @@ PyObject* ParaParser::fetchEntry(int id) {
    //MAP::iterator iter;   
    //for(iter = entries->begin(); iter != entries->end(); iter++)
    //   printf("%d\n", iter->first);
-   //printf("query key is %d\n",id);
+   //
+   printf("query key is %lu\n",id);
+
+   char** current_entry;
 
-   MAP::iterator find_it = entries->find(id);
-   if( find_it == entries->end() )
-      return line_dict;
+   if ( current_mode == IN_VECTOR ) {
+      printf("IN_VECTOR mode\n");
+      if (id >= v_entries->size())
+         return line_dict;
+
+      printf("size %d\n",v_entries->size());
+      current_entry = (*v_entries)[id];
+   }
+
+   if ( current_mode == IN_MAP ) {
+      printf("IN_MAP mode\n");
+      MAP::iterator find_it = entries->find(id);
+      if( find_it == entries->end() )
+         return line_dict;
+
+      current_entry = (*entries)[id];
+   }
 
-   char** current_entry = (*entries)[id];
 
    int status;
       
@@ -302,7 +354,6 @@ PyObject* ParaParser::fetchEntry(int id) {
       if (elem == 0)
          printf("Error: type %s/ elem %s\n",current_type,current);
       
-      //printf("add item\n");
       free(current);
 
       status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), elem);
index b7cf739..807ab57 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <Python.h>
 #include <map>
+#include <vector>
 using namespace std;
 
 struct KeyCmp {
@@ -12,8 +13,14 @@ struct KeyCmp {
    }
 };
 
+enum storage_mode { IN_VECTOR=0, IN_MAP=1 };
+
+typedef unsigned long map_key_t;
+
+typedef map<map_key_t,char**,KeyCmp> MAP;
+
+typedef vector<char**> VECTOR;
 
-typedef map<unsigned long,char**,KeyCmp> MAP;
 
 class ParaParser{
 
@@ -23,13 +30,16 @@ class ParaParser{
       size_t num_columns;
       char** types_list;
 
+      storage_mode current_mode;
+
       MAP *entries;
+      VECTOR *v_entries;
 
    public:
-      ParaParser(const char* fmt, char** _fields, int num_entries);
+      ParaParser(const char* fmt, char** _fields, int num_entries, storage_mode mode);
       int parseFile(char* reads_filename);
       void create_entry_from_line(const char* current_line, char* format_string);
-      PyObject* fetchEntry(int id);
+      PyObject* fetchEntry(map_key_t id);
 
       ~ParaParser(){}
 };
index 9f4bd57..d267d03 100644 (file)
@@ -5,7 +5,7 @@ import sys
 from ParaParser import *
 
 def run(file):
-   parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5)
+   parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5,IN_MAP)
    parser.parseFile(file)
    entry1_dict = parser.fetchEntry(1111)
    print entry1_dict
@@ -16,18 +16,28 @@ def run(file):
 
    del parser
 
-   parser2 = ParaParser("%d%s",["field0","field1"],2)
+   parser2 = ParaParser("%d%s",["field0","field1"],2,IN_MAP)
    file = 'test2.data'
    parser2.parseFile(file)
    entry1_dict = parser2.fetchEntry(1111)
    print entry1_dict
 
-   parser2 = ParaParser("%d%s",["field0","field1"],2)
-   file = 'test2.data'
+   parser2 = ParaParser("%lu%s",["field0","field1"],2,IN_MAP)
+   file = 'test3.data'
    parser2.parseFile(file)
-   entry1_dict = parser2.fetchEntry(1111)
+   entry1_dict = parser2.fetchEntry(1000100000503)
    print entry1_dict
 
+   fields = ['id', 'chr', 'pos', 'strand', 'mismatches', 'length',\
+   'offset', 'seq', 'prb', 'cal_prb', 'chastity']
+
+   parser2 = ParaParser("%lu%d%d%s%d%d%d%s%s%s%s",fields,len(fields),IN_VECTOR)
+   file = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/MAP3'
+   parser2.parseFile(file)
+   entry1_dict = parser2.fetchEntry(0)
+   print entry1_dict
+   entry1_dict = parser2.fetchEntry(1)
+   print entry1_dict
 
 if __name__ == '__main__':
    run('test.data')