projects
/
qpalma.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
a7bd04f
)
+ added VECTOR and MAP mode to ParaParser in order to handle entries with same
author
fabio
<fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 15:24:36 +0000
(15:24 +0000)
committer
fabio
<fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 2 Jun 2008 15:24:36 +0000
(15:24 +0000)
ids
git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@9335
e1793c9e
-67f9-0310-80fc-
b846ff1f7b36
ParaParser/.ParaParser.cpp.swp
patch
|
blob
|
history
ParaParser/ParaParser.cpp
patch
|
blob
|
history
ParaParser/ParaParser.h
patch
|
blob
|
history
ParaParser/simple_example.py
patch
|
blob
|
history
diff --git
a/ParaParser/.ParaParser.cpp.swp
b/ParaParser/.ParaParser.cpp.swp
index
43117d0
..
ee493c4
100644
(file)
Binary files a/ParaParser/.ParaParser.cpp.swp and b/ParaParser/.ParaParser.cpp.swp differ
diff --git
a/ParaParser/ParaParser.cpp
b/ParaParser/ParaParser.cpp
index
c7c3151
..
0c326ba
100644
(file)
--- a/
ParaParser/ParaParser.cpp
+++ b/
ParaParser/ParaParser.cpp
@@
-6,6
+6,22
@@
#include <string>
using namespace std;
#include <string>
using namespace std;
+/*
+ *
+ *
+ */
+
+/*
+bool check_for_well_formed_format_string(const char* fmt) {
+ types_list = (char**) malloc(sizeof(char**)*num_columns);
+ char *pruned_ptr = pruned_format_string;
+ //printf("types list\n");
+ for(size_t f_idx=0;f_idx<num_columns;f_idx++) {
+ char *part = strtok (pruned_ptr, "%");
+ pruned_ptr = NULL;
+
+}
+*/
/**
* Split string and return pointers to its parts.
/**
* Split string and return pointers to its parts.
@@
-65,11
+81,12
@@
unsigned split_args(char *args, char *** const argv_ptr, const char *delim)
*
*/
*
*/
-ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
+ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries
, storage_mode mode
) {
// check that we have more than zero entries and that the format string
// contains exactly num_entries format elements.
// check that we have more than zero entries and that the format string
// contains exactly num_entries format elements.
- assert(num_entries>0);
-
+ if ( num_entries < 1 )
+ printf("Error: You need at least one field !\n");
+
num_columns = (size_t) num_entries;
// count how many entries are parsed in one line (number of %'s)
num_columns = (size_t) num_entries;
// count how many entries are parsed in one line (number of %'s)
@@
-88,7
+105,9
@@
ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
field_names = (char**) malloc(sizeof(char*)*num_columns);
for(size_t idx=0;idx<num_columns;idx++) {
field_names[idx] = (char*) malloc(sizeof(char)*buf_size);
field_names = (char**) malloc(sizeof(char*)*num_columns);
for(size_t idx=0;idx<num_columns;idx++) {
field_names[idx] = (char*) malloc(sizeof(char)*buf_size);
- strncpy(field_names[idx],_fields[idx],strlen(_fields[idx]));
+ strncpy(field_names[idx],_fields[idx],strlen(_fields[idx])+1);
+ field_names[idx][strlen(_fields[idx])] = '\0';
+ //printf("%s\n",field_names[idx]);
}
char* pruned_format_string = (char*) malloc(sizeof(char)*buf_size);
}
char* pruned_format_string = (char*) malloc(sizeof(char)*buf_size);
@@
-115,6
+134,12
@@
ParaParser::ParaParser(const char* fmt, char** _fields, int num_entries) {
//printf("%s(%d) ",part,strlen(part));
}
//printf("\n");
//printf("%s(%d) ",part,strlen(part));
}
//printf("\n");
+ //
+ current_mode = mode;
+ if(current_mode != IN_VECTOR && current_mode != IN_MAP) {
+ printf("Error: Wrong save mode!");
+ exit(EXIT_FAILURE);
+ }
}
}
@@
-148,12
+173,19
@@
void ParaParser::create_entry_from_line(const char* current_line, char* format_s
}
free(mutable_line);
}
free(mutable_line);
- int id = atoi(current_entries[0]);
- //printf("id is %d\n",id);
- //printf("size of map %d\n",entries->size());
- (*entries)[id] = current_entries;
- //printf("size of map %d\n",entries->size());
+ map_key_t id = strtoul(current_entries[0],NULL,10);
+
+ if ( current_mode == IN_VECTOR ) {
+ printf("size is %zd\n",v_entries->size());
+ v_entries->push_back(current_entries);
+ printf("size is %zd\n",v_entries->size());
+ }
+ if ( current_mode == IN_MAP ) {
+ printf("size is %zd\n",entries->size());
+ (*entries)[id] = current_entries;
+ printf("size is %zd\n",entries->size());
+ }
}
}
@@
-196,7
+228,11
@@
int ParaParser::parseFile(char* reads_filename) {
off_t reads_filesize = reads_stat.st_size;
//printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
off_t reads_filesize = reads_stat.st_size;
//printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
- entries = new MAP();
+ if ( current_mode == IN_VECTOR )
+ v_entries = new VECTOR();
+
+ if ( current_mode == IN_MAP )
+ entries = new MAP();
// try to acquire file using mmap
void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
// try to acquire file using mmap
void *reads_area = mmap (NULL,reads_filesize,PROT_READ,MAP_PRIVATE,reads_fid,0);
@@
-262,7
+298,7
@@
int ParaParser::parseFile(char* reads_filename) {
*
*/
*
*/
-PyObject* ParaParser::fetchEntry(
in
t id) {
+PyObject* ParaParser::fetchEntry(
map_key_
t id) {
PyObject* line_dict = PyDict_New();
//printf("begin of fetchEntry\n");
//printf("size of map %d\n",entries->size());
PyObject* line_dict = PyDict_New();
//printf("begin of fetchEntry\n");
//printf("size of map %d\n",entries->size());
@@
-270,13
+306,29
@@
PyObject* ParaParser::fetchEntry(int id) {
//MAP::iterator iter;
//for(iter = entries->begin(); iter != entries->end(); iter++)
// printf("%d\n", iter->first);
//MAP::iterator iter;
//for(iter = entries->begin(); iter != entries->end(); iter++)
// printf("%d\n", iter->first);
- //printf("query key is %d\n",id);
+ //
+ printf("query key is %lu\n",id);
+
+ char** current_entry;
- MAP::iterator find_it = entries->find(id);
- if( find_it == entries->end() )
- return line_dict;
+ if ( current_mode == IN_VECTOR ) {
+ printf("IN_VECTOR mode\n");
+ if (id >= v_entries->size())
+ return line_dict;
+
+ printf("size %d\n",v_entries->size());
+ current_entry = (*v_entries)[id];
+ }
+
+ if ( current_mode == IN_MAP ) {
+ printf("IN_MAP mode\n");
+ MAP::iterator find_it = entries->find(id);
+ if( find_it == entries->end() )
+ return line_dict;
+
+ current_entry = (*entries)[id];
+ }
- char** current_entry = (*entries)[id];
int status;
int status;
@@
-302,7
+354,6
@@
PyObject* ParaParser::fetchEntry(int id) {
if (elem == 0)
printf("Error: type %s/ elem %s\n",current_type,current);
if (elem == 0)
printf("Error: type %s/ elem %s\n",current_type,current);
- //printf("add item\n");
free(current);
status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), elem);
free(current);
status = PyDict_SetItem(line_dict, PyString_FromString(field_names[idx]), elem);
diff --git
a/ParaParser/ParaParser.h
b/ParaParser/ParaParser.h
index
b7cf739
..
807ab57
100644
(file)
--- a/
ParaParser/ParaParser.h
+++ b/
ParaParser/ParaParser.h
@@
-3,6
+3,7
@@
#include <Python.h>
#include <map>
#include <Python.h>
#include <map>
+#include <vector>
using namespace std;
struct KeyCmp {
using namespace std;
struct KeyCmp {
@@
-12,8
+13,14
@@
struct KeyCmp {
}
};
}
};
+enum storage_mode { IN_VECTOR=0, IN_MAP=1 };
+
+typedef unsigned long map_key_t;
+
+typedef map<map_key_t,char**,KeyCmp> MAP;
+
+typedef vector<char**> VECTOR;
-typedef map<unsigned long,char**,KeyCmp> MAP;
class ParaParser{
class ParaParser{
@@
-23,13
+30,16
@@
class ParaParser{
size_t num_columns;
char** types_list;
size_t num_columns;
char** types_list;
+ storage_mode current_mode;
+
MAP *entries;
MAP *entries;
+ VECTOR *v_entries;
public:
public:
- ParaParser(const char* fmt, char** _fields, int num_entries);
+ ParaParser(const char* fmt, char** _fields, int num_entries
, storage_mode mode
);
int parseFile(char* reads_filename);
void create_entry_from_line(const char* current_line, char* format_string);
int parseFile(char* reads_filename);
void create_entry_from_line(const char* current_line, char* format_string);
- PyObject* fetchEntry(
in
t id);
+ PyObject* fetchEntry(
map_key_
t id);
~ParaParser(){}
};
~ParaParser(){}
};
diff --git
a/ParaParser/simple_example.py
b/ParaParser/simple_example.py
index
9f4bd57
..
d267d03
100644
(file)
--- a/
ParaParser/simple_example.py
+++ b/
ParaParser/simple_example.py
@@
-5,7
+5,7
@@
import sys
from ParaParser import *
def run(file):
from ParaParser import *
def run(file):
- parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5)
+ parser = ParaParser("%d%s%s%d%d",["field0","field1","field2","field3","field4"],5
,IN_MAP
)
parser.parseFile(file)
entry1_dict = parser.fetchEntry(1111)
print entry1_dict
parser.parseFile(file)
entry1_dict = parser.fetchEntry(1111)
print entry1_dict
@@
-16,18
+16,28
@@
def run(file):
del parser
del parser
- parser2 = ParaParser("%d%s",["field0","field1"],2)
+ parser2 = ParaParser("%d%s",["field0","field1"],2
,IN_MAP
)
file = 'test2.data'
parser2.parseFile(file)
entry1_dict = parser2.fetchEntry(1111)
print entry1_dict
file = 'test2.data'
parser2.parseFile(file)
entry1_dict = parser2.fetchEntry(1111)
print entry1_dict
- parser2 = ParaParser("%
d%s",["field0","field1"],2
)
- file = 'test
2
.data'
+ parser2 = ParaParser("%
lu%s",["field0","field1"],2,IN_MAP
)
+ file = 'test
3
.data'
parser2.parseFile(file)
parser2.parseFile(file)
- entry1_dict = parser2.fetchEntry(1
111
)
+ entry1_dict = parser2.fetchEntry(1
000100000503
)
print entry1_dict
print entry1_dict
+ fields = ['id', 'chr', 'pos', 'strand', 'mismatches', 'length',\
+ 'offset', 'seq', 'prb', 'cal_prb', 'chastity']
+
+ parser2 = ParaParser("%lu%d%d%s%d%d%d%s%s%s%s",fields,len(fields),IN_VECTOR)
+ file = '/fml/ag-raetsch/home/fabio/tmp/transcriptome_data/MAP3'
+ parser2.parseFile(file)
+ entry1_dict = parser2.fetchEntry(0)
+ print entry1_dict
+ entry1_dict = parser2.fetchEntry(1)
+ print entry1_dict
if __name__ == '__main__':
run('test.data')
if __name__ == '__main__':
run('test.data')