c7c315142614ddaaee67b87c1b862e797bf081c0
1 #include "ParaParser.h"
11 * Split string and return pointers to its parts.
13 * \param args The string to be split.
14 * \param argv_ptr Pointer to the list of substrings.
15 * \param delim Delimiter.
17 * This function modifies \a args by replacing each occurance of \a delim by
18 * zero. A \p NULL-terminated array of pointers to char* is allocated dynamically
19 * and these pointers are initialized to point to the broken-up substrings
20 * within \a args. A pointer to this array is returned via \a argv_ptr.
22 * \return The number of substrings found in \a args.
25 unsigned split_args(char *args
, char *** const argv_ptr
, const char *delim
)
31 p
= args
+ strspn(args
, delim
);
33 i
= strcspn(p
, delim
);
38 p
+= strspn(p
, delim
);
40 *argv_ptr
= (char**) malloc((n
+ 1) * sizeof(char *));
43 p
= args
+ strspn(args
, delim
);
46 j
= strcspn(p
, delim
);
49 p
+= strcspn(p
, delim
);
53 p
+= strspn(p
, delim
);
63 * The constructor needs the format string to be used with sscanf and the names
64 * of the respective fields for the dictionary.
68 ParaParser::ParaParser(const char* fmt
, char** _fields
, int num_entries
) {
69 // check that we have more than zero entries and that the format string
70 // contains exactly num_entries format elements.
71 assert(num_entries
>0);
73 num_columns
= (size_t) num_entries
;
75 // count how many entries are parsed in one line (number of %'s)
76 size_t format_num_entries
= 0;
77 for(size_t fidx
=0;fidx
<strlen(fmt
);fidx
++)
81 if (format_num_entries
!= num_columns
) {
82 printf("Error: For every entry in the format string you have to supply a name!\n");
86 // copy the field names to the member variable
87 size_t buf_size
= 512;
88 field_names
= (char**) malloc(sizeof(char*)*num_columns
);
89 for(size_t idx
=0;idx
<num_columns
;idx
++) {
90 field_names
[idx
] = (char*) malloc(sizeof(char)*buf_size
);
91 strncpy(field_names
[idx
],_fields
[idx
],strlen(_fields
[idx
]));
94 char* pruned_format_string
= (char*) malloc(sizeof(char)*buf_size
);
95 size_t pruned_size
= 0;
96 for(size_t idx
=0;idx
<strlen(fmt
);idx
++) {
97 pruned_format_string
[pruned_size
] = fmt
[idx
];
101 pruned_format_string
[pruned_size
] = '%';
102 pruned_format_string
[++pruned_size
] = '\0';
103 //printf("%s\n",pruned_format_string);
105 types_list
= (char**) malloc(sizeof(char**)*num_columns
);
106 char *pruned_ptr
= pruned_format_string
;
107 //printf("types list\n");
108 for(size_t f_idx
=0;f_idx
<num_columns
;f_idx
++) {
109 char *part
= strtok (pruned_ptr
, "%");
112 types_list
[f_idx
] = (char*) malloc(sizeof(char*)*strlen(part
)+1);
113 types_list
[f_idx
][strlen(part
)] = '\0';
114 strncpy(types_list
[f_idx
],part
,strlen(part
));
115 //printf("%s(%d) ",part,strlen(part));
127 void ParaParser::create_entry_from_line(const char* current_line
, char* format_string
) {
128 //printf("current line is %s",current_line);
130 // create an array of void ptrs
131 char **current_entries
= (char**) malloc(sizeof(char*)*num_columns
);
133 char* mutable_line
= (char*) malloc(sizeof(char)*strlen(current_line
));
134 strncpy(mutable_line
,current_line
,strlen(current_line
));
135 mutable_line
[strlen(current_line
)-1] = '\t';
138 size_t num_parts
= split_args(mutable_line
,&line_parts
,"\t");
140 for(size_t idx
=0; idx
<num_columns
;idx
++) {
141 char* col
= line_parts
[idx
];
143 //printf("elem:%s\n",col);
144 current_entries
[idx
] = (char*) malloc(sizeof(char)*strlen(col
)+1);
145 //current_entries2[idx] = string(col);
146 strncpy(current_entries
[idx
],col
,strlen(col
));
147 current_entries
[idx
][strlen(col
)] = '\0';
151 int id
= atoi(current_entries
[0]);
152 //printf("id is %d\n",id);
153 //printf("size of map %d\n",entries->size());
154 (*entries
)[id
] = current_entries
;
155 //printf("size of map %d\n",entries->size());
166 int ParaParser::parseFile(char* reads_filename
) {
167 size_t buf_size
= 512;
168 char* line
= (char*) malloc(sizeof(char)*buf_size
);
170 //printf("open %s\n",reads_filename);
171 FILE *input_fs
= fopen(reads_filename
,"r");
172 if (input_fs
== NULL
)
177 while (getline (&line
, &buf_size
, input_fs
) >= 0)
182 //printf("file has %d lines\n",line_ctr);
184 if(input_fs
== NULL
) {
185 printf("Error: Could not open file: %s",reads_filename
);
189 int reads_fid
= fileno(input_fs
);
190 struct stat reads_stat
;
191 if ( fstat(reads_fid
,&reads_stat
) == -1) {
196 off_t reads_filesize
= reads_stat
.st_size
;
197 //printf("Reads file is of size %lu bytes\n",(unsigned long) reads_filesize);
201 // try to acquire file using mmap
202 void *reads_area
= mmap (NULL
,reads_filesize
,PROT_READ
,MAP_PRIVATE
,reads_fid
,0);
203 if (reads_area
== MAP_FAILED
) {
209 //printf("Successfully mapped %lu bytes of reads file into memory\n",(unsigned long)reads_filesize);
211 char* lineBeginPtr
= (char*) reads_area
;
212 char* lineEndPtr
= (char*) reads_area
;
213 char* end_of_mapped_area
= ((char*) reads_area
) + reads_filesize
;
215 while (*lineEndPtr
!= '\n' && lineEndPtr
!= end_of_mapped_area
) lineEndPtr
++;
218 char* current_line
= (char*) malloc(sizeof(char)*512);
219 memset(current_line
,0,512);
221 unsigned long line_size
= lineEndPtr
- lineBeginPtr
;
222 strncpy(current_line
,lineBeginPtr
,line_size
);
223 current_line
[line_size
] = '\0';
229 if (strcmp(current_line
,"") == 0)
232 create_entry_from_line(current_line
,format_string
);
235 printf("Error while parsing line (status=%d).",status
);
237 lineBeginPtr
= lineEndPtr
;
238 while (*(char*)lineEndPtr
!= '\n' && lineEndPtr
!= end_of_mapped_area
) lineEndPtr
++;
241 current_line
= strncpy(current_line
,lineBeginPtr
,lineEndPtr
-lineBeginPtr
);
242 current_line
[lineEndPtr
-lineBeginPtr
] = '\0';
248 status
= munmap(reads_area
,reads_filesize
);
252 // free unneeded variables
265 PyObject
* ParaParser::fetchEntry(int id
) {
266 PyObject
* line_dict
= PyDict_New();
267 //printf("begin of fetchEntry\n");
268 //printf("size of map %d\n",entries->size());
270 //MAP::iterator iter;
271 //for(iter = entries->begin(); iter != entries->end(); iter++)
272 // printf("%d\n", iter->first);
273 //printf("query key is %d\n",id);
275 MAP::iterator find_it
= entries
->find(id
);
276 if( find_it
== entries
->end() )
279 char** current_entry
= (*entries
)[id
];
283 for(size_t idx
=0;idx
<num_columns
;idx
++) {
284 char* current_type
= types_list
[idx
];
285 char* current
= current_entry
[idx
];
287 // init elem to make compiler happy
290 if ( strcmp(current_type
,"d")==0 )
291 elem
= PyInt_FromString(current
,NULL
,10);
293 if ( strcmp(current_type
,"f")==0 )
294 elem
= PyFloat_FromString(PyString_FromString(current
),NULL
);
296 if ( strcmp(current_type
,"s")==0 )
297 elem
= PyString_FromString(current
);
299 if ( strcmp(current_type
,"lu")==0 )
300 elem
= PyLong_FromString(current
,NULL
,10);
303 printf("Error: type %s/ elem %s\n",current_type
,current
);
305 //printf("add item\n");
308 status
= PyDict_SetItem(line_dict
, PyString_FromString(field_names
[idx
]), elem
);
312 //for(size_t idx=0;idx<current_read->size;idx++) {
313 // status = PyList_SetItem( prb_list, idx, PyInt_FromLong(current_read->prb[idx]-50) );
314 // status = PyList_SetItem( cal_prb_list, idx, PyInt_FromLong(current_read->cal_prb[idx]-64) );
315 // status = PyList_SetItem( chastity_list, idx, PyInt_FromLong(current_read->chastity[idx]+10) );
319 //printf("end of fetchEntry\n");