#include <string>
using namespace std;
+/*
+ *
+ *
+ */
+
+/*
+bool check_for_well_formed_format_string(const char* fmt) {
+ types_list = (char**) malloc(sizeof(char**)*num_columns);
+ char *pruned_ptr = pruned_format_string;
+ //printf("types list\n");
+ for(size_t f_idx=0;f_idx<num_columns;f_idx++) {
+ char *part = strtok (pruned_ptr, "%");
+ pruned_ptr = NULL;
+
+}
+*/
/**
* Split string and return pointers to its parts.
*
*/
-ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
+ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries, storage_mode mode) {
// check that we have more than zero entries and that the format string
// contains exactly num_entries format elements.
- assert(num_entries>0);
-
+ if ( num_entries < 1 )
+ printf("Error: You need at least one field !\n");
+
num_columns = (size_t) num_entries;
// count how many entries are parsed in one line (number of %'s)
field_names = (char**) malloc(sizeof(char*)*num_columns);
for(size_t idx=0;idx<num_columns;idx++) {
field_names[idx] = (char*) malloc(sizeof(char)*buf_size);
- strncpy(field_names[idx],_fields[idx],strlen(_fields[idx]));
+ strncpy(field_names[idx],_fields[idx],strlen(_fields[idx])+1);
+ field_names[idx][strlen(_fields[idx])] = '\0';
+ //printf("%s\n",field_names[idx]);
}
char* pruned_format_string = (char*) malloc(sizeof(char)*buf_size);
//printf("%s(%d) ",part,strlen(part));
}
//printf("\n");
+ //
+ current_mode = mode;
+ if(current_mode != IN_VECTOR && current_mode != IN_MAP) {
+ printf("Error: Wrong save mode!");
+ exit(EXIT_FAILURE);
+ }
}
}
free(mutable_line);
- int id = atoi(current_entries[0]);
- //printf("id is %d\n",id);
- //printf("size of map %d\n",entries->size());
- (*entries)[id] = current_entries;
- //printf("size of map %d\n",entries->size());
+ map_key_t id = strtoul(current_entries[0],NULL,10);
+
+ if ( current_mode == IN_VECTOR ) {
+ printf("size is %zd\n",v_entries->size());
+ v_entries->push_back(current_entries);
+ printf("size is %zd\n",v_entries->size());
+ }
+ if ( current_mode == IN_MAP ) {
+ printf("size is %zd\n",entries->size());
+ (*entries)[id] = current_entries;
+ printf("size is %zd\n",entries->size());
+ }
}
off_t reads_filesize = reads_stat.st_size;
//printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
- entries = new MAP();
+ if ( current_mode == IN_VECTOR )
+ v_entries = new VECTOR();
+
+ if ( current_mode == IN_MAP )
+ entries = new MAP();
// try to acquire file using mmap
void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
*
*/
-PyObject* ParaParser::fetchEntry(int id) {
+PyObject* ParaParser::fetchEntry(map_key_t id) {
PyObject* line_dict = PyDict_New();
//printf("begin of fetchEntry\n");
//printf("size of map %d\n",entries->size());
//MAP::iterator iter;
//for(iter = entries->begin(); iter != entries->end(); iter++)
// printf("%d\n", iter->first);
- //printf("query key is %d\n",id);
+ //
+ printf("query key is %lu\n",id);
+
+ char** current_entry;
- MAP::iterator find_it = entries->find(id);
- if( find_it == entries->end() )
- return line_dict;
+ if ( current_mode == IN_VECTOR ) {
+ printf("IN_VECTOR mode\n");
+ if (id >= v_entries->size())
+ return line_dict;
+
+ printf("size %d\n",v_entries->size());
+ current_entry = (*v_entries)[id];
+ }
+
+ if ( current_mode == IN_MAP ) {
+ printf("IN_MAP mode\n");
+ MAP::iterator find_it = entries->find(id);
+ if( find_it == entries->end() )
+ return line_dict;
+
+ current_entry = (*entries)[id];
+ }
- char** current_entry = (*entries)[id];
int status;
if (elem == 0)
printf("Error: type %s/ elem %s\n",current_type,current);
- //printf("add item\n");
free(current);
status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), elem);
#include <Python.h>
#include <map>
+#include <vector>
using namespace std;
struct KeyCmp {
}
};
+enum storage_mode { IN_VECTOR=0, IN_MAP=1 };
+
+typedef unsigned long map_key_t;
+
+typedef map<map_key_t,char**,KeyCmp> MAP;
+
+typedef vector<char**> VECTOR;
-typedef map<unsigned long,char**,KeyCmp> MAP;
class ParaParser{
size_t num_columns;
char** types_list;
+ storage_mode current_mode;
+
MAP *entries;
+ VECTOR *v_entries;
public:
- ParaParser(const char* fmt, char** _fields, int num_entries);
+ ParaParser(const char* fmt, char** _fields, int num_entries, storage_mode mode);
int parseFile(char* reads_filename);
void create_entry_from_line(const char* current_line, char* format_string);
- PyObject* fetchEntry(int id);
+ PyObject* fetchEntry(map_key_t id);
~ParaParser(){}
};
from ParaParser import *
def run(file):
- parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5)
+ parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5,IN_MAP)
parser.parseFile(file)
entry1_dict = parser.fetchEntry(1111)
print entry1_dict
del parser
- parser2 = ParaParser("%d%s",["field0","field1"],2)
+ parser2 = ParaParser("%d%s",["field0","field1"],2,IN_MAP)
file = 'test2.data'
parser2.parseFile(file)
entry1_dict = parser2.fetchEntry(1111)
print entry1_dict
- parser2 = ParaParser("%d%s",["field0","field1"],2)
- file = 'test2.data'
+ parser2 = ParaParser("%lu%s",["field0","field1"],2,IN_MAP)
+ file = 'test3.data'
parser2.parseFile(file)
- entry1_dict = parser2.fetchEntry(1111)
+ entry1_dict = parser2.fetchEntry(1000100000503)
print entry1_dict
+ fields = ['id', 'chr', 'pos', 'strand', 'mismatches', 'length',\
+ 'offset', 'seq', 'prb', 'cal_prb', 'chastity']
+
+ parser2 = ParaParser("%lu%d%d%s%d%d%d%s%s%s%s",fields,len(fields),IN_VECTOR)
+ file = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/MAP3'
+ parser2.parseFile(file)
+ entry1_dict = parser2.fetchEntry(0)
+ print entry1_dict
+ entry1_dict = parser2.fetchEntry(1)
+ print entry1_dict
if __name__ == '__main__':
run('test.data')