ARB
aisc_parser.c
Go to the documentation of this file.
1 // Coded by Ralf Westram (coder@reallysoft.de) in March 2011 //
2 // Institute of Microbiology (Technical University Munich) //
3 // http://www.arb-home.de/ //
4 
5 #include "aisc_parser.h"
6 
7 const char *Parser::currentLocation(const char *io) {
8  const int LOCBUFFERSIZE = 1024;
9  static char loc_buf[LOCBUFFERSIZE+1];
10  int printed;
11 
12  if (last_line_start) {
13  int column = io - last_line_start + 1;
14 
15  aisc_assert(column >= 0 && column<10000);
16  printed = sprintf(loc_buf, "%s:%i:%i", loc.get_path(), loc.get_linenr(), column);
17  }
18  else {
19  printed = sprintf(loc_buf, "%s:%i", loc.get_path(), loc.get_linenr());
20  }
21 
22  if (printed>LOCBUFFERSIZE) {
23  fprintf(stderr, "AISC: Internal buffer overflow detected -- terminating [loc_buf]\n");
24  error_flag = 1;
25  }
26 
27  return loc_buf;
28 }
29 
30 void Parser::p_err(const char *io, const char *error) {
31  fprintf(stderr, "%s: Error: %s\n", currentLocation(io), error);
32  error_flag = 1;
33 }
34 
35 #ifdef LINUX
36 # define HAVE_VSNPRINTF
37 #endif
38 #ifdef HAVE_VSNPRINTF
39 # define PRINT2BUFFER(buffer, bufsize, templat, parg) vsnprintf(buffer, bufsize, templat, parg);
40 #else
41 # define PRINT2BUFFER(buffer, bufsize, templat, parg) vsprintf(buffer, templat, parg);
42 #endif
43 
44 #define ERRBUFFERSIZE 1024
45 void Parser::p_errf(const char *io, const char *formatString, ...) {
46  static char buf[ERRBUFFERSIZE+1];
47  int printed;
48  va_list varArgs;
49 
50  va_start(varArgs, formatString);
51  printed = PRINT2BUFFER(buf, ERRBUFFERSIZE, formatString, varArgs);
52  va_end(varArgs);
53 
54  if (printed>ERRBUFFERSIZE) {
55  fprintf(stderr, "AISC: Internal buffer overflow detected -- terminating [err]\n");
56  error_flag = 1;
57  }
58  p_err(io, buf);
59 }
60 
61 void Parser::copyTillQuotesTo(const char*& in, char*& out) {
62  // closing quotes are END_STR1 plus END_STR2
63  bool quotes_closed = false;
64  while (!quotes_closed) {
65  while ((lastchar != EOSTR) && (lastchar != END_STR1)) {
66  *(out++) = lastchar;
67  get_byte(in);
68  }
69 
70  if (lastchar == END_STR1) {
71  get_byte(in);
72  if (lastchar == END_STR2) {
73  get_byte(in);
74  quotes_closed = true;
75  }
76  else {
77  *(out++) = END_STR1;
78  }
79  }
80  }
81 }
82 
83 char *Parser::readWord(const char *& in) {
84  char buf[1024];
85  char *cp = buf;
86 
87  if (lastchar == BEG_STR1) {
88  get_byte(in);
89  if (lastchar == BEG_STR2) {
90  get_byte(in);
91  copyTillQuotesTo(in, cp);
92  }
93  else {
94  *(cp++) = BEG_STR1;
95  copyWordTo(in, cp);
96  }
97  }
98  else copyWordTo(in, cp);
99 
100  if (lastchar != EOSTR) skip_over_spaces_and_comments(in);
101 
102  char *result = NULp;
103  if (lastchar == EOSTR) {
104  p_err_exp_but_saw_EOF(in, "terminator after string");
105  }
106  else if (cp != buf) { // do not return empty string, return NULp instead
107  *cp = 0;
108  result = strdup(buf);
109  }
110 
111  return result;
112 }
113 
114 char *Parser::SETSOURCE(const char *& in, enum TOKEN& foundTokenType) {
115  char *result = NULp;
116  const char *space = in+9;
117 
118  if (*space != ' ') p_err(in, "space expected after '@SETSOURCE' (injected code)");
119  else {
120  in = space;
121  get_byte(in);
122  skip_over_spaces(in);
123 
124  const char *file = in-1;
125  const char *comma = strchr(file, ',');
126 
127  if (!comma) p_err(in, "comma expected after '@SETSOURCE filename' (injected code)");
128  else {
129  const char *end = strchr(comma, '@');
130 
131  if (!end) p_err(in, "'@' expected after '@SETSOURCE filename,line' (injected code)");
132  else {
133  char *filename = copy_string_part(file, comma-1);
134  set_source(filename, atoi(comma+1));
135  free(filename);
136 
137  in = end+1;
138  get_byte(in);
139 
140  result = parse_token(in, foundTokenType);
141  }
142  }
143  }
144  return result;
145 }
146 
147 char *Parser::parse_token(const char *& in, enum TOKEN& foundTokenType) {
148  skip_over_spaces_and_comments_multiple_lines(in);
149 
150  char *result = NULp;
151  foundTokenType = TOK_INVALID;
152 
153  switch (lastchar) {
154  case EOSTR: foundTokenType = TOK_EOS; break;
155  case '{': foundTokenType = TOK_BRACE_OPEN; break;
156  case '}': foundTokenType = TOK_BRACE_CLOSE; break;
157  case ',': foundTokenType = TOK_COMMA; break;
158  case ';': foundTokenType = TOK_SEMI; break;
159 
160  case '@':
161  if (strncmp(in, "SETSOURCE", 9) == 0) {
162  result = SETSOURCE(in, foundTokenType);
163  }
164  else {
165  get_byte(in);
166  skip_over_spaces(in);
167  if (lastchar == EOSTR) {
168  p_err_exp_but_saw_EOF(in, "ID behind '@'");
169  }
170  else {
171  result = readWord(in);
172  foundTokenType = TOK_AT_WORD;
173  }
174  }
175  break;
176 
177  default:
178  result = readWord(in);
179  foundTokenType = TOK_WORD;
180  break;
181  }
182 
183  return result;
184 }
185 
186 class Header : virtual Noncopyable {
187  char *key;
188  Header *next;
189  public:
190  Header(const char *key_) { key = strdup(key_); next = NULp; }
191  ~Header() { free(key); delete next; }
192 
193  void set_next(Header *header) { aisc_assert(!next); next = header; }
194 
195  const char *get_key() const { return key; }
196  const Header *next_header() const { return next; }
197 };
198 
199 class HeaderList : virtual Noncopyable {
200  Header *head;
201  Header *tail;
202 
203  char *loc_defined_at;
204 
205  public:
207  head = tail = NULp;
208  loc_defined_at = NULp;
209  }
210  void reset() {
211  delete head;
212  head = tail = NULp;
213  free(loc_defined_at);
214  loc_defined_at = NULp;
215  }
217 
218  void set_first_header(Header *header, const char *location) {
219  aisc_assert(!head);
220  head = tail = header;
221  loc_defined_at = strdup(location);
222  }
223  void append(Header *header) {
224  aisc_assert(head); // use set_first_header()
225  tail->set_next(header);
226  tail = header;
227  }
228 
229  const Header *first_header() const { return head; }
230  const char *defined_at() const { return loc_defined_at; }
231 };
232 
233 Token *Parser::parseBrace(const char *& in, const char *key) {
234  Token *res = NULp;
235  if (!error_flag) {
236  char *openLoc = strdup(currentLocation(in));
238 
239  if (block) {
240  res = new Token(key, block);
241  block = NULp;
242  expect_and_skip_closing_brace(in, openLoc);
243  }
244  else {
245  p_err_empty_braces(in);
246  }
247  delete block;
248  free(openLoc);
249  }
250  return res;
251 }
252 
253 TokenList *Parser::parseTokenList(const char *& in, HeaderList& headerList) {
254  TokenList *items = new TokenList;
255  const Header *header = headerList.first_header();
256 
257  get_byte(in);
258 
259  bool reached_end_of_list = false;
260  while (!error_flag && !reached_end_of_list) {
261  TOKEN foundTokenType;
262  char *str = parse_token(in, foundTokenType);
263 
264  if (!error_flag) {
265  switch (foundTokenType) {
266  case TOK_SEMI:
267  case TOK_BRACE_CLOSE:
268  reached_end_of_list = true;
269  break;
270 
271  case TOK_BRACE_OPEN: {
272  Token *sub = parseBrace(in, header ? header->get_key() : "{");
273  if (sub) items->append(sub);
274  break;
275  }
276  case TOK_COMMA:
277  if (!header) p_err_exp_but_saw(in, "string", "','");
278  else get_byte(in);
279  break;
280 
281  case TOK_AT_WORD:
282  if (header != headerList.first_header()) {
283  p_err_ill_atWord(in);
284  }
285  else {
286  if (!str) {
287  p_err_expected(in, "ID behind '@'");
288  }
289  else {
290  headerList.reset();
291  headerList.set_first_header(new Header(str), currentLocation(in));
292  header = headerList.first_header();
293  }
294  while (lastchar == ',' && !error_flag) {
295  get_byte(in);
296  char *str2 = parse_token(in, foundTokenType);
297  if (foundTokenType != TOK_AT_WORD) p_err_exp_atWord(in);
298  else headerList.append(new Header(str2));
299  free(str2);
300  }
301  if (!error_flag) expect_and_skip(in, ';');
302  if (!error_flag) {
303  aisc_assert(headerList.first_header()->get_key());
304  reached_end_of_list = true;
305  }
306  }
307  break;
308 
309  case TOK_WORD: {
310  Token *new_token = NULp;
311  if (header) {
312  new_token = new Token(header->get_key(), str);
313  expect_line_terminator(in);
314  }
315  else {
316  char *str2 = parse_token(in, foundTokenType);
317  switch (foundTokenType) {
318  case TOK_BRACE_OPEN: {
319  new_token = parseBrace(in, str);
320  break;
321  }
322  case TOK_WORD:
323  new_token = new Token(str, str2);
324  expect_line_terminator(in);
325  break;
326 
327  case TOK_COMMA:
328  case TOK_SEMI:
329  new_token = new Token(str, "");
330  break;
331 
332  case TOK_AT_WORD: p_err_exp_string_but_saw(in, "'@'"); break;
333  case TOK_BRACE_CLOSE: p_err_exp_string_but_saw(in, "'}'"); break;
334 
335  case TOK_INVALID: aisc_assert(0); break;
336  }
337  free(str2);
338  }
339 
340  aisc_assert(new_token || error_flag);
341 
342  if (new_token) items->append(new_token);
343 
344  if (!error_flag) {
345  if (lastchar == ';') {
346  const Header *missingVal = header ? header->next_header() : NULp;
347  if (missingVal) {
348  char buf[1000];
349  sprintf(buf, "value for @%s", missingVal->get_key());
350  p_err_exp_but_saw(in, buf, "';'");
351  }
352  else reached_end_of_list = true;
353  get_byte(in);
354  }
355  else get_byte(in);
356  }
357  break;
358  }
359 
360  case TOK_INVALID:
361  p_err(in, "Invalid token (internal error)");
362  break;
363  }
364  }
365 
366  if (!error_flag && header) header = header->next_header();
367 
368  free(str);
369  }
370 
371  if (error_flag || items->empty()) {
372  delete items;
373  items = NULp;
374  }
375 
376  return items;
377 }
378 
379 
382  HeaderList headerList;
383 
384  while ((EOSTR != lastchar) && (lastchar != '}')) {
385  TokenList *list = parseTokenList(in, headerList);
386  if (!error_flag && list) {
387  block->append(list);
388  }
389  }
390 
391  if (block->empty() || error_flag) {
392  delete block;
393  block = NULp;
394  }
395 
396  return block;
397 }
398 
399 Code *Parser::parse_program(const char *in, const char *filename) {
400  Code *first_cl = NULp;
401  Code *cl = NULp;
402 
403  set_source(filename, 0);
404 
405  while (lastchar != EOSTR) {
406  skip_over_spaces_and_comments_multiple_lines(in);
407 
408  if (lastchar == EOSTR) break;
409 
410  const char *p = in-1;
411  while ((lastchar != EOSTR) && (lastchar != '\n')) get_byte(in);
412 
413  {
414  Code *hcl = new Code;
415  Code *& next = cl ? cl->next : first_cl;
416  cl = next = hcl;
417  }
418  cl->str = copy_string_part(p, in-2);
419  cl->source = Location(loc.get_linenr(), filename);
420  }
421 
422  return first_cl;
423 }
424 
Location source
Definition: aisc_parser.h:39
TOKEN
Definition: aisc_token.h:18
char * copy_string_part(const char *first, const char *last)
Definition: aisc_inline.h:49
string result
const char * get_path() const
Definition: aisc_location.h:44
void space()
Definition: test_unit.h:414
#define END_STR1
Definition: aisc_inline.h:18
void set_next(Header *header)
Definition: aisc_parser.c:193
Header(const char *key_)
Definition: aisc_parser.c:190
#define ERRBUFFERSIZE
Definition: aisc_parser.c:44
const char * get_key() const
Definition: aisc_parser.c:195
char * str
Definition: aisc_parser.h:37
void append(Token *tok)
Definition: aisc_token.h:98
void set_first_header(Header *header, const char *location)
Definition: aisc_parser.c:218
const Header * next_header() const
Definition: aisc_parser.c:196
void reset()
Definition: aisc_parser.c:210
const char * defined_at() const
Definition: aisc_parser.c:230
static void error(const char *msg)
Definition: mkptypes.cxx:96
char * str
Definition: defines.h:20
void append(Header *header)
Definition: aisc_parser.c:223
Code * next
Definition: aisc_parser.h:36
#define BEG_STR1
Definition: aisc_inline.h:16
va_end(argPtr)
class Code * parse_program(const char *in, const char *filename)
Definition: aisc_parser.c:399
#define PRINT2BUFFER(buffer, bufsize, templat, parg)
Definition: aisc_parser.c:41
TokenListBlock * parseTokenListBlock(const char *&in)
Definition: aisc_parser.c:380
void set_source(const Location &other)
Definition: aisc_parser.h:200
#define BEG_STR2
Definition: aisc_inline.h:17
va_start(argPtr, format)
#define NULp
Definition: cxxforward.h:116
static ED4_block block
Definition: ED4_block.cxx:74
const Header * first_header() const
Definition: aisc_parser.c:229
#define EOSTR
Definition: aisc_inline.h:15
int get_linenr() const
Definition: aisc_location.h:45
#define aisc_assert(cond)
Definition: aisc_def.h:11
bool empty() const
Definition: aisc_token.h:130
#define END_STR2
Definition: aisc_inline.h:19
void append(TokenList *cmd)
Definition: aisc_token.h:131
static int column
Definition: arb_a2ps.c:295
bool empty() const
Definition: aisc_token.h:108