Technorati Tags: antlr,c,parser
Writing a Java parser using ANTLR is a breeze. And why not ! It is written in Java, the default IDE which comes with it is in Java and etc. But recently I needed a tool for writing C/C++ parsers. My first choice was to go ahead with YACC or BISON. But management of the generated parser is hard (at least to people who are new to them), so I started with ANTLR.
Writing a Java parser using ANTLR is a breeze. And why not ! It is written in Java, the default IDE which comes with it is in Java and etc. But recently I needed a tool for writing C/C++ parsers. My first choice was to go ahead with YACC or BISON. But management of the generated parser is hard (at least to people who are new to them), so I started with ANTLR.
After few rounds of testing the ANTLR seemed ok to me. But the actual problem came when I have to provide my own custom handler for error processing. After some rounds of googling and diving through the documentation I found the solution.
First create a generic handler: exceptionhandler.h
#pragma once #include "R2SParser.h" #ifdef __cplusplus extern "C" { #endif void myDisplayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames); #ifdef __cplusplus } #endifIts sample implementation: exceptionhandler.cpp (taken from antlr3baserecognizer.c)
#include "exceptionhandler.h" #include <string> void myDisplayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) { //==================================== pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; pANTLR3_STRING ttext; pANTLR3_STRING ftext; pANTLR3_EXCEPTION ex; pANTLR3_COMMON_TOKEN theToken; pANTLR3_BASE_TREE theBaseTree; pANTLR3_COMMON_TREE theCommonTree; // Retrieve some info for easy reading. // ex = recognizer->state->exception; ttext = NULL; std::string error; // See if there is a 'filename' we can use // /*if (ex->streamName == NULL) { if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(stderr, "-end of input-("); } else { ANTLR3_FPRINTF(stderr, "-unknown source-("); } } else { ftext = ex->streamName->to8(ex->streamName); ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); }*/ // Next comes the line number // ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); ANTLR3_FPRINTF(stderr, " : error %d : %s", recognizer->state->exception->type, (pANTLR3_UINT8) (recognizer->state->exception->message)); // How we determine the next piece is dependent on which thing raised the // error. // switch (recognizer->type) { case ANTLR3_TYPE_PARSER: // Prepare the knowledge we know we have // parser = (pANTLR3_PARSER) (recognizer->super); if(parser->super == NULL) { fprintf(stdout, "I think i can use it"); }else { fprintf(stdout, "BAD LUCK"); } tparser = NULL; is = parser->tstream->istream; theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token); ttext = theToken->toString(theToken); ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine); if (theToken != NULL) { if (theToken->type == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(stderr, ", at <EOF>"); } else { // Guard against null text in a token // ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars); } } break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token); ttext = theBaseTree->toStringTree(theBaseTree); if (theBaseTree != NULL) { theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super; if (theCommonTree != NULL) { theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree); } ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree)); ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars); } break; default: ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n"); return; break; } switch (ex->type) { case ANTLR3_UNWANTED_TOKEN_EXCEPTION: if (tokenNames == NULL) { ANTLR3_FPRINTF(stderr, " : Extraneous input..."); } else { if (ex->expecting == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n"); } else { ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]); } } break; case ANTLR3_MISSING_TOKEN_EXCEPTION: if (tokenNames == NULL) { ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting); } else { if (ex->expecting == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n"); } else { ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]); } } break; case ANTLR3_RECOGNITION_EXCEPTION: ANTLR3_FPRINTF(stderr, " : syntax error...\n"); break; case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: if (tokenNames == NULL) { ANTLR3_FPRINTF(stderr, " : syntax error...\n"); } else { if (ex->expecting == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(stderr, " : expected <EOF>\n"); } else { ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]); } } break; case ANTLR3_NO_VIABLE_ALT_EXCEPTION: ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n"); break; case ANTLR3_MISMATCHED_SET_EXCEPTION: { ANTLR3_UINT32 count; ANTLR3_UINT32 bit; ANTLR3_UINT32 size; ANTLR3_UINT32 numbits; pANTLR3_BITSET errBits; ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : "); count = 0; errBits = antlr3BitsetLoad (ex->expectingSet); numbits = errBits->numBits (errBits); size = errBits->size (errBits); if (size > 0) { for (bit = 1; bit < numbits && count < 8 && count < size; bit++) { // TODO: This doesn;t look right - should be asking if the bit is set!! // if (tokenNames[bit]) { ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); count++; } } ANTLR3_FPRINTF(stderr, "\n"); } else { ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n"); ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n"); } } break; case ANTLR3_EARLY_EXIT_EXCEPTION: ANTLR3_FPRINTF(stderr, " : missing elements...\n"); break; default: ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n"); break; } //==================================== }Then create a place holder for error message (this is a trimmed down version): errorstruct.h
#pragma once #ifndef __ERRORSTRUCT__ #define __ERRORSTRUCT__ struct errormessage_struct { char* message; }; typedef struct errormessage_struct ErrorMessage; typedef ErrorMessage* pErrorMessage; #endifNow at this point your ANTLR grammar should have this:
@parser::header { #include "errorstruct.h" #include "exceptionhandler.h" #define ERRORMESSAGE CTX->errorMessage } @parser::context { ErrorMessage errorMessage; } @parser::apifuncs { RECOGNIZER->displayRecognitionError = myDisplayRecognitionError; ERRORMESSAGE.message = NULL; }
And that’s it. Now you can access your error message any where like this parser->errorMessage.message. Similarly, you can add some member function to the structure (in the above example it is errormessage_struct) and then can use it anywhere.
One point worth noting is that with the above approach you get the free threading, which is built into the code generation and the runtime. Here you get one errorMessage per thread.
References:
References: