+ cleaning up code base
[qpalma.git] / ParaParser / ParaParser.cpp
index 0c326ba..ffbe30e 100644 (file)
@@ -1,8 +1,4 @@
 #include "ParaParser.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
 #include <string>
 using namespace std;
 
@@ -143,13 +139,22 @@ ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries, storage
 }
 
 
+
+ParaParser::~ParaParser() {
+   // unmap parsed file
+   int status = munmap(reads_area,reads_filesize);
+   if(status != 0)
+      perror("munmap");
+}
+
+
 /*
  * 
  *
  *
  */
 
-void ParaParser::create_entry_from_line(const char* current_line, char* format_string) {
+void ParaParser::create_entry_from_line(const char* current_line, char* format_string, char* lineBeginPtr, char* lineEndPtr) {
    //printf("current line is %s",current_line);
    
    // create an array of void ptrs
@@ -176,15 +181,19 @@ void ParaParser::create_entry_from_line(const char* current_line, char* format_s
    map_key_t id = strtoul(current_entries[0],NULL,10);
 
    if ( current_mode == IN_VECTOR ) {
-      printf("size is %zd\n",v_entries->size());
+      //printf("size is %zd\n",v_entries->size());
       v_entries->push_back(current_entries);
-      printf("size is %zd\n",v_entries->size());
+      pair<char*,char*> p;
+      p.first = lineBeginPtr;
+      p.second = lineEndPtr;
+      v_ptr_entries->push_back(p);
+      //printf("size is %zd\n",v_entries->size());
    }
 
    if ( current_mode == IN_MAP ) {
-      printf("size is %zd\n",entries->size());
+      //printf("size is %zd\n",entries->size());
       (*entries)[id] = current_entries;
-      printf("size is %zd\n",entries->size());
+      //printf("size is %zd\n",entries->size());
    }
 }
 
@@ -225,24 +234,26 @@ int ParaParser::parseFile(char* reads_filename) {
       exit(EXIT_FAILURE);
    }
 
-   off_t reads_filesize = reads_stat.st_size;
-   //printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
+   reads_filesize = reads_stat.st_size;
+   printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
 
-   if ( current_mode == IN_VECTOR )
+   if ( current_mode == IN_VECTOR ) {
       v_entries = new VECTOR();
+      v_ptr_entries = new PTR_VECTOR();
+   }
 
    if ( current_mode == IN_MAP )
       entries = new MAP();
 
    // try to acquire file using mmap
-   void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
+   reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
    if (reads_area == MAP_FAILED) {
       perror("mmap");
       exit(EXIT_FAILURE);
    }
 
    close(reads_fid);
-   //printf("Successfully mapped %lu bytes of reads file into memory\n",(unsigned long)reads_filesize);
+   printf("Successfully mapped %lu bytes of reads file into memory\n",(unsigned long)reads_filesize);
                                       
    char* lineBeginPtr = (char*) reads_area;
    char* lineEndPtr = (char*) reads_area;     
@@ -261,11 +272,13 @@ int ParaParser::parseFile(char* reads_filename) {
    int readCtr = 0;
    int status = 0;
 
+   printf("Starting to parse file...\n");
+
    while(1) {
       if (strcmp(current_line,"") == 0) 
          break;
 
-      create_entry_from_line(current_line,format_string);
+      create_entry_from_line(current_line,format_string,lineBeginPtr,lineEndPtr);
 
       if (status != 0 )
          printf("Error while parsing line (status=%d).",status);
@@ -280,10 +293,8 @@ int ParaParser::parseFile(char* reads_filename) {
       readCtr += 1;
    }
 
-   // unmap parsed file
-   status = munmap(reads_area,reads_filesize);
-   if(status != 0)
-      perror("munmap");
+   printf("Successfully parsed file !\n");
+
 
    // free unneeded variables
    free(current_line);
@@ -300,28 +311,37 @@ int ParaParser::parseFile(char* reads_filename) {
 
 PyObject* ParaParser::fetchEntry(map_key_t id) {
    PyObject* line_dict = PyDict_New();
+
    //printf("begin of fetchEntry\n");
    //printf("size of map %d\n",entries->size());
    //printf("keys:\n");
    //MAP::iterator iter;   
    //for(iter = entries->begin(); iter != entries->end(); iter++)
    //   printf("%d\n", iter->first);
-   //
-   printf("query key is %lu\n",id);
+   //printf("query key is %lu\n",id);
 
    char** current_entry;
 
+   char* lineBeginPtr = 0;
+   char* lineEndPtr = 0;
+
    if ( current_mode == IN_VECTOR ) {
-      printf("IN_VECTOR mode\n");
+      //printf("IN_VECTOR mode\n");
       if (id >= v_entries->size())
          return line_dict;
 
-      printf("size %d\n",v_entries->size());
+      //printf("size %d\n",v_entries->size());
       current_entry = (*v_entries)[id];
+      pair<char*,char*> ptr_pair = (*v_ptr_entries)[id];
+      lineBeginPtr = ptr_pair.first;
+      lineEndPtr = ptr_pair.second;
    }
 
+   assert (lineBeginPtr != 0);
+   assert (lineEndPtr != 0);
+
    if ( current_mode == IN_MAP ) {
-      printf("IN_MAP mode\n");
+      //printf("IN_MAP mode\n");
       MAP::iterator find_it = entries->find(id);
       if( find_it == entries->end() )
          return line_dict;
@@ -349,7 +369,9 @@ PyObject* ParaParser::fetchEntry(map_key_t id) {
          elem = PyString_FromString(current);
 
       if ( strcmp(current_type,"lu")==0 )
-         elem = PyLong_FromString(current,NULL,10);
+         elem = PyString_FromString(current);
+         //elem = PyInt_FromString(current,NULL,10);
+         //elem = PyLong_FromString(current,NULL,10);
          
       if (elem == 0)
          printf("Error: type %s/ elem %s\n",current_type,current);
@@ -367,6 +389,17 @@ PyObject* ParaParser::fetchEntry(map_key_t id) {
    //}
    */
 
-   //printf("end of fetchEntry\n");
-   return line_dict;
+   PyObject *return_value = PyTuple_New(2);
+   PyTuple_SetItem(return_value,0,line_dict);
+
+   char* current_line = (char*) malloc(sizeof(char)*512);
+   memset(current_line,0,512);
+   unsigned long line_size = lineEndPtr - lineBeginPtr;
+   strncpy(current_line,lineBeginPtr,line_size-1);
+
+
+   PyObject *original_line = PyString_FromString(current_line);
+   PyTuple_SetItem(return_value,1,original_line);
+
+   return return_value;
 }