Technorati Tags:
antlr,
c,
parser
Writing a Java parser using
ANTLR is a breeze. And why not ! It is written in Java, the default IDE which comes with it is in Java and etc. But recently I needed a tool for writing C/C++ parsers. My first choice was to go ahead with YACC or BISON. But management of the generated parser is hard (at least to people who are new to them), so I started with ANTLR.
After few rounds of testing the ANTLR seemed ok to me. But the actual problem came when I have to provide my own custom handler for error processing. After some rounds of googling and diving through the documentation I found the solution.
First create a generic handler: exceptionhandler.h
#pragma once
#include "R2SParser.h"
#ifdef __cplusplus
extern "C" {
#endif
void myDisplayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
#ifdef __cplusplus
}
#endif
Its sample implementation:
exceptionhandler.cpp (taken from
antlr3baserecognizer.c)
#include "exceptionhandler.h"
#include <string>
void myDisplayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
{
//====================================
pANTLR3_PARSER parser;
pANTLR3_TREE_PARSER tparser;
pANTLR3_INT_STREAM is;
pANTLR3_STRING ttext;
pANTLR3_STRING ftext;
pANTLR3_EXCEPTION ex;
pANTLR3_COMMON_TOKEN theToken;
pANTLR3_BASE_TREE theBaseTree;
pANTLR3_COMMON_TREE theCommonTree;
// Retrieve some info for easy reading.
//
ex = recognizer->state->exception;
ttext = NULL;
std::string error;
// See if there is a 'filename' we can use
//
/*if (ex->streamName == NULL)
{
if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
{
ANTLR3_FPRINTF(stderr, "-end of input-(");
}
else
{
ANTLR3_FPRINTF(stderr, "-unknown source-(");
}
}
else
{
ftext = ex->streamName->to8(ex->streamName);
ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
}*/
// Next comes the line number
//
ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
ANTLR3_FPRINTF(stderr, " : error %d : %s",
recognizer->state->exception->type,
(pANTLR3_UINT8) (recognizer->state->exception->message));
// How we determine the next piece is dependent on which thing raised the
// error.
//
switch (recognizer->type)
{
case ANTLR3_TYPE_PARSER:
// Prepare the knowledge we know we have
//
parser = (pANTLR3_PARSER) (recognizer->super);
if(parser->super == NULL)
{
fprintf(stdout, "I think i can use it");
}else
{
fprintf(stdout, "BAD LUCK");
}
tparser = NULL;
is = parser->tstream->istream;
theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
ttext = theToken->toString(theToken);
ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
if (theToken != NULL)
{
if (theToken->type == ANTLR3_TOKEN_EOF)
{
ANTLR3_FPRINTF(stderr, ", at <EOF>");
}
else
{
// Guard against null text in a token
//
ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
}
}
break;
case ANTLR3_TYPE_TREE_PARSER:
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
parser = NULL;
is = tparser->ctnstream->tnstream->istream;
theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
ttext = theBaseTree->toStringTree(theBaseTree);
if (theBaseTree != NULL)
{
theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
if (theCommonTree != NULL)
{
theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
}
ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
}
break;
default:
ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
return;
break;
}
switch (ex->type)
{
case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
if (tokenNames == NULL)
{
ANTLR3_FPRINTF(stderr, " : Extraneous input...");
}
else
{
if (ex->expecting == ANTLR3_TOKEN_EOF)
{
ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
}
else
{
ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
}
}
break;
case ANTLR3_MISSING_TOKEN_EXCEPTION:
if (tokenNames == NULL)
{
ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
}
else
{
if (ex->expecting == ANTLR3_TOKEN_EOF)
{
ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
}
else
{
ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
}
}
break;
case ANTLR3_RECOGNITION_EXCEPTION:
ANTLR3_FPRINTF(stderr, " : syntax error...\n");
break;
case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
if (tokenNames == NULL)
{
ANTLR3_FPRINTF(stderr, " : syntax error...\n");
}
else
{
if (ex->expecting == ANTLR3_TOKEN_EOF)
{
ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
}
else
{
ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
}
}
break;
case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
break;
case ANTLR3_MISMATCHED_SET_EXCEPTION:
{
ANTLR3_UINT32 count;
ANTLR3_UINT32 bit;
ANTLR3_UINT32 size;
ANTLR3_UINT32 numbits;
pANTLR3_BITSET errBits;
ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
count = 0;
errBits = antlr3BitsetLoad (ex->expectingSet);
numbits = errBits->numBits (errBits);
size = errBits->size (errBits);
if (size > 0)
{
for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
{
// TODO: This doesn;t look right - should be asking if the bit is set!!
//
if (tokenNames[bit])
{
ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
count++;
}
}
ANTLR3_FPRINTF(stderr, "\n");
}
else
{
ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
}
}
break;
case ANTLR3_EARLY_EXIT_EXCEPTION:
ANTLR3_FPRINTF(stderr, " : missing elements...\n");
break;
default:
ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
break;
}
//====================================
}
Then create a place holder for error message (this is a trimmed down version):
errorstruct.h
#pragma once
#ifndef __ERRORSTRUCT__
#define __ERRORSTRUCT__
struct errormessage_struct
{
char* message;
};
typedef struct errormessage_struct ErrorMessage;
typedef ErrorMessage* pErrorMessage;
#endif
Now at this point your ANTLR grammar should have this:
@parser::header {
#include "errorstruct.h"
#include "exceptionhandler.h"
#define ERRORMESSAGE CTX->errorMessage
}
@parser::context
{
ErrorMessage errorMessage;
}
@parser::apifuncs {
RECOGNIZER->displayRecognitionError = myDisplayRecognitionError;
ERRORMESSAGE.message = NULL;
}
And that’s it. Now you can access your error message any where like this parser->errorMessage.message. Similarly, you can add some member function to the structure (in the above example it is errormessage_struct) and then can use it anywhere.