CppCodeGenerator.java - 源码阅读网

antlr-2.7.7.jar

|

antlr:antlr:2.7.7

META-INF

antlr

TokenQueue.java

ParseTreeToken.java

ByteBuffer.java

ANTLRLexer.java

StringLiteralElement.java

CharQueue.java

TokenStreamRecognitionException.java

SemanticException.java

TokenRefElement.java

GrammarAnalyzer.java

ParseTree.java

Lookahead.java

debug

RuleEndElement.java

BaseAST.java

BlockContext.java

MismatchedCharException.java

HTMLCodeGenerator.java

TreeWalkerGrammar.java

WildcardElement.java

Tool.java

LexerGrammar.java

LLkParser.java

JavaCodeGeneratorPrintWriterManager.java

Makefile.in

TreeElement.java

DumpASTVisitor.java

ANTLRTokdefLexer.java

CharBuffer.java

antlr.g

ASTVisitor.java

ASTNULLType.java

ASdebug

TokenStreamRetryException.java

RuleSymbol.java

ActionTransInfo.java

CppCodeGenerator.java

OneOrMoreBlock.java

CSharpNameSpace.java

ANTLRHashString.java

TokenRangeElement.java

CommonASTWithHiddenTokens.java

LexerSharedInputState.java

TokenStreamBasicFilter.java

Alternative.java

TokenStreamIOException.java

Grammar.java

BlockEndElement.java

ParseTreeRule.java

CharStreamIOException.java

ParserSharedInputState.java

PreservingFileWriter.java

DefaultFileLineFormatter.java

CharScanner.java

ANTLRParser.java

ASTFactory.java

TokenStream.java

build

SimpleTokenManager.java

CSharpCharFormatter.java

GrammarSymbol.java

JavaCharFormatter.java

ANTLRStringBuffer.java

JavaCodeGenerator.java

TreeParser.java

FileCopyException.java

ASTIterator.java

ExceptionSpec.java

ParserGrammar.java

CppBlockFinishingInfo.java

ANTLRGrammarParseBehavior.java

TokenManager.java

ImportVocabTokenManager.java

GrammarElement.java

TokenStreamHiddenTokenFilter.java

Version.java

NoViableAltException.java

CharRangeElement.java

preprocessor

RecognitionException.java

RuleRefElement.java

ANTLRTokdefParser.java

CharFormatter.java

DefineGrammarSymbols.java

BlockWithImpliedExitPath.java

Parser.java

ZeroOrMoreBlock.java

PrintWriterWithSMAP.java

PythonBlockFinishingInfo.java

ASTPair.java

MismatchedTokenException.java

GrammarAtom.java

CodeGenerator.java

SynPredBlock.java

TokenStreamRewriteEngine.java

CommonHiddenStreamToken.java

CSharpBlockFinishingInfo.java

ANTLRTokenTypes.java

StringLiteralSymbol.java

NameSpace.java

InputBuffer.java

ToolErrorHandler.java

Utils.java

tokdef.g

ANTLRException.java

DefaultJavaCodeGeneratorPrintWriterManager.java

PythonCodeGenerator.java

StringUtils.java

TokenStreamSelector.java

TreeBlockContext.java

DocBookCodeGenerator.java

RuleBlock.java

LLkGrammarAnalyzer.java

ExceptionHandler.java

collections

NoViableAltForCharException.java

Token.java

ActionElement.java

ANTLRError.java

CommonAST.java

CharLiteralElement.java

LLkAnalyzer.java

FileLineFormatter.java

TokenBuffer.java

DiagnosticCodeGenerator.java

TreeSpecifierNode.java

MakeGrammar.java

actions

Version.java.in

TreeParserSharedInputState.java

TokenSymbol.java

JavaBlockFinishingInfo.java

CommonToken.java

DefaultToolErrorHandler.java

ANTLRTokdefParserTokenTypes.java

CharStreamException.java

CSharpCodeGenerator.java

AlternativeElement.java

AlternativeBlock.java

CppCharFormatter.java

TokenStreamException.java

TokenWithIndex.java

PythonCharFormatter.java

CppCodeGenerator.java

清空

类结构

package antlr; /* ANTLR Translator Generator * Project led by Terence Parr at http://www.cs.usfca.edu * Software rights: http://www.antlr.org/license.html * * $Id: //depot/code/org.antlr/release/antlr-2.7.7/antlr/CppCodeGenerator.java#2 $ */ // C++ code generator by Pete Wells: pete@yamuna.demon.co.uk // #line generation contributed by: Ric Klaren <klaren@cs.utwente.nl> import java.util.Enumeration; import java.util.Hashtable; import antlr.collections.impl.BitSet; import antlr.collections.impl.Vector; import java.io.PrintWriter; //SAS: changed for proper text file io import java.io.IOException; import java.io.FileWriter; /** Generate MyParser.cpp, MyParser.hpp, MyLexer.cpp, MyLexer.hpp * and MyParserTokenTypes.hpp */ public class CppCodeGenerator extends CodeGenerator { boolean DEBUG_CPP_CODE_GENERATOR = false; // non-zero if inside syntactic predicate generation protected int syntacticPredLevel = 0; // Are we generating ASTs (for parsers and tree parsers) right now? protected boolean genAST = false; // Are we saving the text consumed (for lexers) right now? protected boolean saveText = false; // Generate #line's protected boolean genHashLines = true; // Generate constructors or not protected boolean noConstructors = false; // Used to keep track of lineno in output protected int outputLine; protected String outputFile; // Grammar parameters set up to handle different grammar classes. // These are used to get instanceof tests out of code generation boolean usingCustomAST = false; String labeledElementType; String labeledElementASTType; // mostly the same as labeledElementType except in parsers String labeledElementASTInit; String labeledElementInit; String commonExtraArgs; String commonExtraParams; String commonLocalVars; String lt1Value; String exceptionThrown; String throwNoViable; // Tracks the rule being generated. Used for mapTreeId RuleBlock currentRule; // Tracks the rule or labeled subrule being generated. Used for AST generation. String currentASTResult; // Mapping between the ids used in the current alt, and the // names of variables used to represent their AST values. Hashtable treeVariableMap = new Hashtable(); /** Used to keep track of which AST variables have been defined in a rule * (except for the #rule_name and #rule_name_in var's */ Hashtable declaredASTVariables = new Hashtable(); // Count of unnamed generated variables int astVarNumber = 1; // Special value used to mark duplicate in treeVariableMap protected static final String NONUNIQUE = new String(); public static final int caseSizeThreshold = 127; // ascii is max private Vector semPreds; // Used to keep track of which (heterogeneous AST types are used) // which need to be set in the ASTFactory of the generated parser private Vector astTypes; private static String namespaceStd = "ANTLR_USE_NAMESPACE(std)"; private static String namespaceAntlr = "ANTLR_USE_NAMESPACE(antlr)"; private static NameSpace nameSpace = null; private static final String preIncludeCpp = "pre_include_cpp"; private static final String preIncludeHpp = "pre_include_hpp"; private static final String postIncludeCpp = "post_include_cpp"; private static final String postIncludeHpp = "post_include_hpp"; /** Create a C++ code-generator using the given Grammar. * The caller must still call setTool, setBehavior, and setAnalyzer * before generating code. */ public CppCodeGenerator() { super(); charFormatter = new CppCharFormatter(); } /** Adds a semantic predicate string to the sem pred vector These strings will be used to build an array of sem pred names when building a debugging parser. This method should only be called when the debug option is specified */ protected int addSemPred(String predicate) { semPreds.appendElement(predicate); return semPreds.size()-1; } public void exitIfError() { if (antlrTool.hasError()) { antlrTool.fatalError("Exiting due to errors."); } } protected int countLines( String s ) { int lines = 0; for( int i = 0; i < s.length(); i++ ) { if( s.charAt(i) == '\n' ) lines++; } return lines; } /** Output a String to the currentOutput stream. * Ignored if string is null. * @param s The string to output */ protected void _print(String s) { if (s != null) { outputLine += countLines(s); currentOutput.print(s); } } /** Print an action without leading tabs, attempting to * preserve the current indentation level for multi-line actions * Ignored if string is null. * @param s The action string to output */ protected void _printAction(String s) { if (s != null) { outputLine += countLines(s)+1; super._printAction(s); } } /** Print an action stored in a token surrounded by #line stuff */ public void printAction(Token t) { if (t != null) { genLineNo(t.getLine()); printTabs(); _printAction(processActionForSpecialSymbols(t.getText(), t.getLine(), null, null) ); genLineNo2(); } } /** Print a header action by #line stuff also process any tree construction * @param name The name of the header part */ public void printHeaderAction(String name) { Token a = (antlr.Token)behavior.headerActions.get(name); if (a != null) { genLineNo(a.getLine()); println(processActionForSpecialSymbols(a.getText(), a.getLine(), null, null) ); genLineNo2(); } } /** Output a String followed by newline, to the currentOutput stream. * Ignored if string is null. * @param s The string to output */ protected void _println(String s) { if (s != null) { outputLine += countLines(s)+1; currentOutput.println(s); } } /** Output tab indent followed by a String followed by newline, * to the currentOutput stream. Ignored if string is null. * @param s The string to output */ protected void println(String s) { if (s != null) { printTabs(); outputLine += countLines(s)+1; currentOutput.println(s); } } /** Generate a #line or // line depending on options */ public void genLineNo(int line) { if ( line == 0 ) { line++; } if( genHashLines ) _println("#line "+line+" \""+antlrTool.fileMinusPath(antlrTool.grammarFile)+"\""); } /** Generate a #line or // line depending on options */ public void genLineNo(GrammarElement el) { if( el != null ) genLineNo(el.getLine()); } /** Generate a #line or // line depending on options */ public void genLineNo(Token t) { if (t != null) genLineNo(t.getLine()); } /** Generate a #line or // line depending on options */ public void genLineNo2() { if( genHashLines ) { _println("#line "+(outputLine+1)+" \""+outputFile+"\""); } } /// Bound safe isDigit private boolean charIsDigit( String s, int i ) { return (i < s.length()) && Character.isDigit(s.charAt(i)); } /** Normalize a string coming from antlr's lexer. E.g. translate java * escapes to values. Check their size (multibyte) bomb out if they are * multibyte (bit crude). Then reescape to C++ style things. * Used to generate strings for match() and matchRange() * @param lit the literal string * @param isCharLiteral if it's for a character literal * (enforced to be one length) and enclosed in ' * FIXME: bombing out on mb chars. Should be done in Lexer. * FIXME: this is another horrible hack. * FIXME: life would be soooo much easier if the stuff from the lexer was * normalized in some way. */ private String convertJavaToCppString( String lit, boolean isCharLiteral ) { // System.out.println("convertJavaToCppLiteral: "+lit); String ret = new String(); String s = lit; int i = 0; int val = 0; if( isCharLiteral ) // verify & strip off quotes { if( ! lit.startsWith("'") || ! lit.endsWith("'") ) antlrTool.error("Invalid character literal: '"+lit+"'"); } else { if( ! lit.startsWith("\"") || ! lit.endsWith("\"") ) antlrTool.error("Invalid character string: '"+lit+"'"); } s = lit.substring(1,lit.length()-1); String prefix=""; int maxsize = 255; if( grammar instanceof LexerGrammar ) { // vocab size seems to be 1 bigger than it actually is maxsize = ((LexerGrammar)grammar).charVocabulary.size() - 1; if( maxsize > 255 ) prefix= "L"; } // System.out.println("maxsize "+maxsize+" prefix "+prefix); while ( i < s.length() ) { if( s.charAt(i) == '\\' ) { if( s.length() == i+1 ) antlrTool.error("Invalid escape in char literal: '"+lit+"' looking at '"+s.substring(i)+"'"); // deal with escaped junk switch ( s.charAt(i+1) ) { case 'a' : val = 7; i += 2; break; case 'b' : val = 8; i += 2; break; case 't' : val = 9; i += 2; break; case 'n' : val = 10; i += 2; break; case 'f' : val = 12; i += 2; break; case 'r' : val = 13; i += 2; break; case '"' : case '\'' : case '\\' : val = s.charAt(i+1); i += 2; break; case 'u' : // Unicode char \u1234 if( i+5 < s.length() ) { val = Character.digit(s.charAt(i+2), 16) * 16 * 16 * 16 + Character.digit(s.charAt(i+3), 16) * 16 * 16 + Character.digit(s.charAt(i+4), 16) * 16 + Character.digit(s.charAt(i+5), 16); i += 6; } else antlrTool.error("Invalid escape in char literal: '"+lit+"' looking at '"+s.substring(i)+"'"); break; case '0' : // \123 case '1' : case '2' : case '3' : if( charIsDigit(s, i+2) ) { if( charIsDigit(s, i+3) ) { val = (s.charAt(i+1)-'0')*8*8 + (s.charAt(i+2)-'0')*8 + (s.charAt(i+3)-'0'); i += 4; } else { val = (s.charAt(i+1)-'0')*8 + (s.charAt(i+2)-'0'); i += 3; } } else { val = s.charAt(i+1)-'0'; i += 2; } break; case '4' : case '5' : case '6' : case '7' : if ( charIsDigit(s, i+2) ) { val = (s.charAt(i+1)-'0')*8 + (s.charAt(i+2)-'0'); i += 3; } else { val = s.charAt(i+1)-'0'; i += 2; } default: antlrTool.error("Unhandled escape in char literal: '"+lit+"' looking at '"+s.substring(i)+"'"); val = 0; } } else val = s.charAt(i++); if( grammar instanceof LexerGrammar ) { if( val > maxsize ) // abort if too big { String offender; if( ( 0x20 <= val ) && ( val < 0x7F ) ) offender = charFormatter.escapeChar(val,true); else offender = "0x"+Integer.toString(val,16); antlrTool.error("Character out of range in "+(isCharLiteral?"char literal":"string constant")+": '"+s+"'"); antlrTool.error("Vocabulary size: "+maxsize+" Character "+offender); } } if( isCharLiteral ) { // we should be at end of char literal here.. if( i != s.length() ) antlrTool.error("Invalid char literal: '"+lit+"'"); if( maxsize <= 255 ) { if ( (val <= 255) && (val & 0x80) != 0 ) // the joys of sign extension in the support lib *cough* // actually the support lib needs to be fixed but that's a bit // hairy too. ret = "static_cast<unsigned char>('"+charFormatter.escapeChar(val,true)+"')"; else ret = "'"+charFormatter.escapeChar(val,true)+"'"; } else { // so wchar_t is some implementation defined int like thing // so this may even lead to having 16 bit or 32 bit cases... // I smell some extra grammar options in the future :( ret = "L'"+charFormatter.escapeChar(val,true)+"'"; } } else ret += charFormatter.escapeChar(val,true); } if( !isCharLiteral ) ret = prefix+"\""+ret+"\""; return ret; } /** Generate the parser, lexer, treeparser, and token types in C++ */ public void gen() { // Do the code generation try { // Loop over all grammars Enumeration grammarIter = behavior.grammars.elements(); while (grammarIter.hasMoreElements()) { Grammar g = (Grammar)grammarIter.nextElement(); if ( g.debuggingOutput ) { antlrTool.error(g.getFilename()+": C++ mode does not support -debug"); } // Connect all the components to each other g.setGrammarAnalyzer(analyzer); g.setCodeGenerator(this); analyzer.setGrammar(g); // To get right overloading behavior across hetrogeneous grammars setupGrammarParameters(g); g.generate(); exitIfError(); } // Loop over all token managers (some of which are lexers) Enumeration tmIter = behavior.tokenManagers.elements(); while (tmIter.hasMoreElements()) { TokenManager tm = (TokenManager)tmIter.nextElement(); if (!tm.isReadOnly()) { // Write the token manager tokens as C++ // this must appear before genTokenInterchange so that // labels are set on string literals genTokenTypes(tm); // Write the token manager tokens as plain text genTokenInterchange(tm); } exitIfError(); } } catch (IOException e) { antlrTool.reportException(e, null); } } /** Generate code for the given grammar element. * @param blk The {...} action to generate */ public void gen(ActionElement action) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genAction("+action+")"); if ( action.isSemPred ) { genSemPred(action.actionText, action.line); } else { if ( grammar.hasSyntacticPredicate ) { println("if ( inputState->guessing==0 ) {"); tabs++; } ActionTransInfo tInfo = new ActionTransInfo(); String actionStr = processActionForSpecialSymbols(action.actionText, action.getLine(), currentRule, tInfo); if ( tInfo.refRuleRoot!=null ) { // Somebody referenced "#rule", make sure translated var is valid // assignment to #rule is left as a ref also, meaning that assignments // with no other refs like "#rule = foo();" still forces this code to be // generated (unnecessarily). println(tInfo.refRuleRoot + " = "+labeledElementASTType+"(currentAST.root);"); } // dump the translated action genLineNo(action); printAction(actionStr); genLineNo2(); if ( tInfo.assignToRoot ) { // Somebody did a "#rule=", reset internal currentAST.root println("currentAST.root = "+tInfo.refRuleRoot+";"); // reset the child pointer too to be last sibling in sibling list // now use if else in stead of x ? y : z to shut CC 4.2 up. println("if ( "+tInfo.refRuleRoot+"!="+labeledElementASTInit+" &&"); tabs++; println(tInfo.refRuleRoot+"->getFirstChild() != "+labeledElementASTInit+" )"); println(" currentAST.child = "+tInfo.refRuleRoot+"->getFirstChild();"); tabs--; println("else"); tabs++; println("currentAST.child = "+tInfo.refRuleRoot+";"); tabs--; println("currentAST.advanceChildToEnd();"); } if ( grammar.hasSyntacticPredicate ) { tabs--; println("}"); } } } /** Generate code for the given grammar element. * @param blk The "x|y|z|..." block to generate */ public void gen(AlternativeBlock blk) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen("+blk+")"); println("{"); genBlockPreamble(blk); genBlockInitAction(blk); // Tell AST generation to build subrule result String saveCurrentASTResult = currentASTResult; if (blk.getLabel() != null) { currentASTResult = blk.getLabel(); } boolean ok = grammar.theLLkAnalyzer.deterministic(blk); CppBlockFinishingInfo howToFinish = genCommonBlock(blk, true); genBlockFinish(howToFinish, throwNoViable); println("}"); // Restore previous AST generation currentASTResult = saveCurrentASTResult; } /** Generate code for the given grammar element. * @param blk The block-end element to generate. Block-end * elements are synthesized by the grammar parser to represent * the end of a block. */ public void gen(BlockEndElement end) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genRuleEnd("+end+")"); } /** Generate code for the given grammar element. * Only called from lexer grammars. * @param blk The character literal reference to generate */ public void gen(CharLiteralElement atom) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genChar("+atom+")"); if ( ! (grammar instanceof LexerGrammar) ) antlrTool.error("cannot ref character literals in grammar: "+atom); if ( atom.getLabel() != null ) { println(atom.getLabel() + " = " + lt1Value + ";"); } boolean oldsaveText = saveText; saveText = saveText && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE; // if in lexer and ! on element, save buffer index to kill later if ( !saveText||atom.getAutoGenType()==GrammarElement.AUTO_GEN_BANG ) println("_saveIndex = text.length();"); print(atom.not ? "matchNot(" : "match("); _print(convertJavaToCppString( atom.atomText, true )); _println(" /* charlit */ );"); if ( !saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) println("text.erase(_saveIndex);"); // kill text atom put in buffer saveText = oldsaveText; } /** Generate code for the given grammar element. * Only called from lexer grammars. * @param blk The character-range reference to generate */ public void gen(CharRangeElement r) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genCharRangeElement("+r.beginText+".."+r.endText+")"); if ( ! (grammar instanceof LexerGrammar) ) antlrTool.error("cannot ref character range in grammar: "+r); if ( r.getLabel() != null && syntacticPredLevel == 0) { println(r.getLabel() + " = " + lt1Value + ";"); } // Correctly take care of saveIndex stuff... boolean save = ( grammar instanceof LexerGrammar && ( !saveText || r.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) ); if (save) println("_saveIndex=text.length();"); println("matchRange("+convertJavaToCppString(r.beginText,true)+ ","+convertJavaToCppString(r.endText,true)+");"); if (save) println("text.erase(_saveIndex);"); } /** Generate the lexer C++ files */ public void gen(LexerGrammar g) throws IOException { // If debugging, create a new sempred vector for this grammar if (g.debuggingOutput) semPreds = new Vector(); if( g.charVocabulary.size() > 256 ) antlrTool.warning(g.getFilename()+": Vocabularies of this size still experimental in C++ mode (vocabulary size now: "+g.charVocabulary.size()+")"); setGrammar(g); if (!(grammar instanceof LexerGrammar)) { antlrTool.panic("Internal error generating lexer"); } genBody(g); genInclude(g); } /** Generate code for the given grammar element. * @param blk The (...)+ block to generate */ public void gen(OneOrMoreBlock blk) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen+("+blk+")"); String label; String cnt; println("{ // ( ... )+"); genBlockPreamble(blk); if ( blk.getLabel() != null ) { cnt = "_cnt_"+blk.getLabel(); } else { cnt = "_cnt" + blk.ID; } println("int "+cnt+"=0;"); if ( blk.getLabel() != null ) { label = blk.getLabel(); } else { label = "_loop" + blk.ID; } println("for (;;) {"); tabs++; // generate the init action for ()+ ()* inside the loop // this allows us to do usefull EOF checking... genBlockInitAction(blk); // Tell AST generation to build subrule result String saveCurrentASTResult = currentASTResult; if (blk.getLabel() != null) { currentASTResult = blk.getLabel(); } boolean ok = grammar.theLLkAnalyzer.deterministic(blk); // generate exit test if greedy set to false // and an alt is ambiguous with exit branch // or when lookahead derived purely from end-of-file // Lookahead analysis stops when end-of-file is hit, // returning set {epsilon}. Since {epsilon} is not // ambig with any real tokens, no error is reported // by deterministic() routines and we have to check // for the case where the lookahead depth didn't get // set to NONDETERMINISTIC (this only happens when the // FOLLOW contains real atoms + epsilon). boolean generateNonGreedyExitPath = false; int nonGreedyExitDepth = grammar.maxk; if ( !blk.greedy && blk.exitLookaheadDepth<=grammar.maxk && blk.exitCache[blk.exitLookaheadDepth].containsEpsilon() ) { generateNonGreedyExitPath = true; nonGreedyExitDepth = blk.exitLookaheadDepth; } else if ( !blk.greedy && blk.exitLookaheadDepth==LLkGrammarAnalyzer.NONDETERMINISTIC ) { generateNonGreedyExitPath = true; } // generate exit test if greedy set to false // and an alt is ambiguous with exit branch if ( generateNonGreedyExitPath ) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) { System.out.println("nongreedy (...)+ loop; exit depth is "+ blk.exitLookaheadDepth); } String predictExit = getLookaheadTestExpression(blk.exitCache, nonGreedyExitDepth); println("// nongreedy exit test"); println("if ( "+cnt+">=1 && "+predictExit+") goto "+label+";"); } CppBlockFinishingInfo howToFinish = genCommonBlock(blk, false); genBlockFinish( howToFinish, "if ( "+cnt+">=1 ) { goto "+label+"; } else {" + throwNoViable + "}" ); println(cnt+"++;"); tabs--; println("}"); println(label+":;"); println("} // ( ... )+"); // Restore previous AST generation currentASTResult = saveCurrentASTResult; } /** Generate the parser C++ file */ public void gen(ParserGrammar g) throws IOException { // if debugging, set up a new vector to keep track of sempred // strings for this grammar if (g.debuggingOutput) semPreds = new Vector(); setGrammar(g); if (!(grammar instanceof ParserGrammar)) { antlrTool.panic("Internal error generating parser"); } genBody(g); genInclude(g); } /** Generate code for the given grammar element. * @param blk The rule-reference to generate */ public void gen(RuleRefElement rr) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genRR("+rr+")"); RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule); if (rs == null || !rs.isDefined()) { // Is this redundant??? antlrTool.error("Rule '" + rr.targetRule + "' is not defined", grammar.getFilename(), rr.getLine(), rr.getColumn()); return; } if (!(rs instanceof RuleSymbol)) { // Is this redundant??? antlrTool.error("'" + rr.targetRule + "' does not name a grammar rule", grammar.getFilename(), rr.getLine(), rr.getColumn()); return; } genErrorTryForElement(rr); // AST value for labeled rule refs in tree walker. // This is not AST construction; it is just the input tree node value. if ( grammar instanceof TreeWalkerGrammar && rr.getLabel() != null && syntacticPredLevel == 0 ) { println(rr.getLabel() + " = (_t == ASTNULL) ? "+labeledElementASTInit+" : "+lt1Value+";"); } // if in lexer and ! on rule ref or alt or rule, save buffer index to // kill later if ( grammar instanceof LexerGrammar && (!saveText||rr.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) { println("_saveIndex = text.length();"); } // Process return value assignment if any printTabs(); if (rr.idAssign != null) { // Warn if the rule has no return type if (rs.block.returnAction == null) { antlrTool.warning("Rule '" + rr.targetRule + "' has no return type", grammar.getFilename(), rr.getLine(), rr.getColumn()); } _print(rr.idAssign + "="); } else { // Warn about return value if any, but not inside syntactic predicate if ( !(grammar instanceof LexerGrammar) && syntacticPredLevel == 0 && rs.block.returnAction != null) { antlrTool.warning("Rule '" + rr.targetRule + "' returns a value", grammar.getFilename(), rr.getLine(), rr.getColumn()); } } // Call the rule GenRuleInvocation(rr); // if in lexer and ! on element or alt or rule, save buffer index to kill later if ( grammar instanceof LexerGrammar && (!saveText||rr.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) { println("text.erase(_saveIndex);"); } // if not in a syntactic predicate if (syntacticPredLevel == 0) { boolean doNoGuessTest = ( grammar.hasSyntacticPredicate && ( grammar.buildAST && rr.getLabel() != null || (genAST && rr.getAutoGenType() == GrammarElement.AUTO_GEN_NONE) ) ); if (doNoGuessTest) { println("if (inputState->guessing==0) {"); tabs++; } if (grammar.buildAST && rr.getLabel() != null) { // always gen variable for rule return on labeled rules // RK: hmm do I know here if the returnAST needs a cast ? println(rr.getLabel() + "_AST = returnAST;"); } if (genAST) { switch (rr.getAutoGenType()) { case GrammarElement.AUTO_GEN_NONE: if( usingCustomAST ) println("astFactory->addASTChild(currentAST, "+namespaceAntlr+"RefAST(returnAST));"); else println("astFactory->addASTChild( currentAST, returnAST );"); break; case GrammarElement.AUTO_GEN_CARET: // FIXME: RK: I'm not so sure this should be an error.. // I think it might actually work and be usefull at times. antlrTool.error("Internal: encountered ^ after rule reference"); break; default: break; } } // if a lexer and labeled, Token label defined at rule level, just set it here if ( grammar instanceof LexerGrammar && rr.getLabel() != null ) { println(rr.getLabel()+"=_returnToken;"); } if (doNoGuessTest) { tabs--; println("}"); } } genErrorCatchForElement(rr); } /** Generate code for the given grammar element. * @param blk The string-literal reference to generate */ public void gen(StringLiteralElement atom) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genString("+atom+")"); // Variable declarations for labeled elements if (atom.getLabel()!=null && syntacticPredLevel == 0) { println(atom.getLabel() + " = " + lt1Value + ";"); } // AST genElementAST(atom); // is there a bang on the literal? boolean oldsaveText = saveText; saveText = saveText && atom.getAutoGenType()==GrammarElement.AUTO_GEN_NONE; // matching genMatch(atom); saveText = oldsaveText; // tack on tree cursor motion if doing a tree walker if (grammar instanceof TreeWalkerGrammar) { println("_t = _t->getNextSibling();"); } } /** Generate code for the given grammar element. * @param blk The token-range reference to generate */ public void gen(TokenRangeElement r) { genErrorTryForElement(r); if ( r.getLabel()!=null && syntacticPredLevel == 0) { println(r.getLabel() + " = " + lt1Value + ";"); } // AST genElementAST(r); // match println("matchRange("+r.beginText+","+r.endText+");"); genErrorCatchForElement(r); } /** Generate code for the given grammar element. * @param blk The token-reference to generate */ public void gen(TokenRefElement atom) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genTokenRef("+atom+")"); if ( grammar instanceof LexerGrammar ) { antlrTool.panic("Token reference found in lexer"); } genErrorTryForElement(atom); // Assign Token value to token label variable if ( atom.getLabel()!=null && syntacticPredLevel == 0) { println(atom.getLabel() + " = " + lt1Value + ";"); } // AST genElementAST(atom); // matching genMatch(atom); genErrorCatchForElement(atom); // tack on tree cursor motion if doing a tree walker if (grammar instanceof TreeWalkerGrammar) { println("_t = _t->getNextSibling();"); } } public void gen(TreeElement t) { // save AST cursor println(labeledElementType+" __t" + t.ID + " = _t;"); // If there is a label on the root, then assign that to the variable if (t.root.getLabel() != null) { println(t.root.getLabel() + " = (_t == "+labeledElementType+"(ASTNULL)) ? "+labeledElementASTInit+" : _t;"); } // check for invalid modifiers ! and ^ on tree element roots if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG ) { antlrTool.error("Suffixing a root node with '!' is not implemented", grammar.getFilename(), t.getLine(), t.getColumn()); t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE); } if ( t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET ) { antlrTool.warning("Suffixing a root node with '^' is redundant; already a root", grammar.getFilename(), t.getLine(), t.getColumn()); t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE); } // Generate AST variables genElementAST(t.root); if (grammar.buildAST) { // Save the AST construction state println(namespaceAntlr+"ASTPair __currentAST" + t.ID + " = currentAST;"); // Make the next item added a child of the TreeElement root println("currentAST.root = currentAST.child;"); println("currentAST.child = "+labeledElementASTInit+";"); } // match root if ( t.root instanceof WildcardElement ) { println("if ( _t == ASTNULL ) throw "+namespaceAntlr+"MismatchedTokenException();"); } else { genMatch(t.root); } // move to list of children println("_t = _t->getFirstChild();"); // walk list of children, generating code for each for (int i=0; i<t.getAlternatives().size(); i++) { Alternative a = t.getAlternativeAt(i); AlternativeElement e = a.head; while ( e != null ) { e.generate(); e = e.next; } } if (grammar.buildAST) { // restore the AST construction state to that just after the // tree root was added println("currentAST = __currentAST" + t.ID + ";"); } // restore AST cursor println("_t = __t" + t.ID + ";"); // move cursor to sibling of tree just parsed println("_t = _t->getNextSibling();"); } /** Generate the tree-parser C++ files */ public void gen(TreeWalkerGrammar g) throws IOException { setGrammar(g); if (!(grammar instanceof TreeWalkerGrammar)) { antlrTool.panic("Internal error generating tree-walker"); } genBody(g); genInclude(g); } /** Generate code for the given grammar element. * @param wc The wildcard element to generate */ public void gen(WildcardElement wc) { // Variable assignment for labeled elements if (wc.getLabel()!=null && syntacticPredLevel == 0) { println(wc.getLabel() + " = " + lt1Value + ";"); } // AST genElementAST(wc); // Match anything but EOF if (grammar instanceof TreeWalkerGrammar) { println("if ( _t == "+labeledElementASTInit+" ) throw "+namespaceAntlr+"MismatchedTokenException();"); } else if (grammar instanceof LexerGrammar) { if ( grammar instanceof LexerGrammar && (!saveText||wc.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) { println("_saveIndex = text.length();"); } println("matchNot(EOF/*_CHAR*/);"); if ( grammar instanceof LexerGrammar && (!saveText||wc.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) { println("text.erase(_saveIndex);"); // kill text atom put in buffer } } else { println("matchNot(" + getValueString(Token.EOF_TYPE) + ");"); } // tack on tree cursor motion if doing a tree walker if (grammar instanceof TreeWalkerGrammar) { println("_t = _t->getNextSibling();"); } } /** Generate code for the given grammar element. * @param blk The (...)* block to generate */ public void gen(ZeroOrMoreBlock blk) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen*("+blk+")"); println("{ // ( ... )*"); genBlockPreamble(blk); String label; if ( blk.getLabel() != null ) { label = blk.getLabel(); } else { label = "_loop" + blk.ID; } println("for (;;) {"); tabs++; // generate the init action for ()+ ()* inside the loop // this allows us to do usefull EOF checking... genBlockInitAction(blk); // Tell AST generation to build subrule result String saveCurrentASTResult = currentASTResult; if (blk.getLabel() != null) { currentASTResult = blk.getLabel(); } boolean ok = grammar.theLLkAnalyzer.deterministic(blk); // generate exit test if greedy set to false // and an alt is ambiguous with exit branch // or when lookahead derived purely from end-of-file // Lookahead analysis stops when end-of-file is hit, // returning set {epsilon}. Since {epsilon} is not // ambig with any real tokens, no error is reported // by deterministic() routines and we have to check // for the case where the lookahead depth didn't get // set to NONDETERMINISTIC (this only happens when the // FOLLOW contains real atoms + epsilon). boolean generateNonGreedyExitPath = false; int nonGreedyExitDepth = grammar.maxk; if ( !blk.greedy && blk.exitLookaheadDepth<=grammar.maxk && blk.exitCache[blk.exitLookaheadDepth].containsEpsilon() ) { generateNonGreedyExitPath = true; nonGreedyExitDepth = blk.exitLookaheadDepth; } else if ( !blk.greedy && blk.exitLookaheadDepth==LLkGrammarAnalyzer.NONDETERMINISTIC ) { generateNonGreedyExitPath = true; } if ( generateNonGreedyExitPath ) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) { System.out.println("nongreedy (...)* loop; exit depth is "+ blk.exitLookaheadDepth); } String predictExit = getLookaheadTestExpression(blk.exitCache, nonGreedyExitDepth); println("// nongreedy exit test"); println("if ("+predictExit+") goto "+label+";"); } CppBlockFinishingInfo howToFinish = genCommonBlock(blk, false); genBlockFinish(howToFinish, "goto " + label + ";"); tabs--; println("}"); println(label+":;"); println("} // ( ... )*"); // Restore previous AST generation currentASTResult = saveCurrentASTResult; } /** Generate an alternative. * @param alt The alternative to generate * @param blk The block to which the alternative belongs */ protected void genAlt(Alternative alt, AlternativeBlock blk) { // Save the AST generation state, and set it to that of the alt boolean savegenAST = genAST; genAST = genAST && alt.getAutoGen(); boolean oldsaveTest = saveText; saveText = saveText && alt.getAutoGen(); // Reset the variable name map for the alternative Hashtable saveMap = treeVariableMap; treeVariableMap = new Hashtable(); // Generate try block around the alt for error handling if (alt.exceptionSpec != null) { println("try { // for error handling"); tabs++; } AlternativeElement elem = alt.head; while ( !(elem instanceof BlockEndElement) ) { elem.generate(); // alt can begin with anything. Ask target to gen. elem = elem.next; } if ( genAST) { if (blk instanceof RuleBlock) { // Set the AST return value for the rule RuleBlock rblk = (RuleBlock)blk; if( usingCustomAST ) println(rblk.getRuleName() + "_AST = "+labeledElementASTType+"(currentAST.root);"); else println(rblk.getRuleName() + "_AST = currentAST.root;"); } else if (blk.getLabel() != null) { // ### future: also set AST value for labeled subrules. // println(blk.getLabel() + "_AST = "+labeledElementASTType+"(currentAST.root);"); antlrTool.warning("Labeled subrules are not implemented", grammar.getFilename(), blk.getLine(), blk.getColumn()); } } if (alt.exceptionSpec != null) { // close try block tabs--; println("}"); genErrorHandler(alt.exceptionSpec); } genAST = savegenAST; saveText = oldsaveTest; treeVariableMap = saveMap; } /** Generate all the bitsets to be used in the parser or lexer * Generate the raw bitset data like "long _tokenSet1_data[] = {...};" * and the BitSet object declarations like * "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);" * Note that most languages do not support object initialization inside a * class definition, so other code-generators may have to separate the * bitset declarations from the initializations (e.g., put the * initializations in the generated constructor instead). * @param bitsetList The list of bitsets to generate. * @param maxVocabulary Ensure that each generated bitset can contain at * least this value. * @param prefix string glued in from of bitset names used for namespace * qualifications. */ protected void genBitsets( Vector bitsetList, int maxVocabulary, String prefix ) { TokenManager tm = grammar.tokenManager; println(""); for (int i = 0; i < bitsetList.size(); i++) { BitSet p = (BitSet)bitsetList.elementAt(i); // Ensure that generated BitSet is large enough for vocabulary p.growToInclude(maxVocabulary); // initialization data println( "const unsigned long " + prefix + getBitsetName(i) + "_data_" + "[] = { " + p.toStringOfHalfWords() + " };" ); // Dump the contents of the bitset in readable format... String t = "// "; for( int j = 0; j < tm.getVocabulary().size(); j++ ) { if ( p.member( j ) ) { if ( (grammar instanceof LexerGrammar) ) { // only dump out for pure printable ascii. if( ( 0x20 <= j ) && ( j < 0x7F ) && (j != '\\') ) t += charFormatter.escapeChar(j,true)+" "; else t += "0x"+Integer.toString(j,16)+" "; } else t += tm.getTokenStringAt(j)+" "; if( t.length() > 70 ) { println(t); t = "// "; } } } if ( t != "// " ) println(t); // BitSet object println( "const "+namespaceAntlr+"BitSet " + prefix + getBitsetName(i) + "(" + getBitsetName(i) + "_data_," + p.size()/32 + ");" ); } } protected void genBitsetsHeader( Vector bitsetList, int maxVocabulary ) { println(""); for (int i = 0; i < bitsetList.size(); i++) { BitSet p = (BitSet)bitsetList.elementAt(i); // Ensure that generated BitSet is large enough for vocabulary p.growToInclude(maxVocabulary); // initialization data println("static const unsigned long " + getBitsetName(i) + "_data_" + "[];"); // BitSet object println("static const "+namespaceAntlr+"BitSet " + getBitsetName(i) + ";"); } } /** Generate the finish of a block, using a combination of the info * returned from genCommonBlock() and the action to perform when * no alts were taken * @param howToFinish The return of genCommonBlock() * @param noViableAction What to generate when no alt is taken */ private void genBlockFinish(CppBlockFinishingInfo howToFinish, String noViableAction) { if (howToFinish.needAnErrorClause && (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) { if ( howToFinish.generatedAnIf ) { println("else {"); } else { println("{"); } tabs++; println(noViableAction); tabs--; println("}"); } if ( howToFinish.postscript!=null ) { println(howToFinish.postscript); } } /** Generate the initaction for a block, which may be a RuleBlock or a * plain AlternativeBLock. * @blk The block for which the preamble is to be generated. */ protected void genBlockInitAction( AlternativeBlock blk ) { // dump out init action if ( blk.initAction!=null ) { genLineNo(blk); printAction(processActionForSpecialSymbols(blk.initAction, blk.line, currentRule, null) ); genLineNo2(); } } /** Generate the header for a block, which may be a RuleBlock or a * plain AlternativeBlock. This generates any variable declarations * and syntactic-predicate-testing variables. * @blk The block for which the preamble is to be generated. */ protected void genBlockPreamble(AlternativeBlock blk) { // define labels for rule blocks. if ( blk instanceof RuleBlock ) { RuleBlock rblk = (RuleBlock)blk; if ( rblk.labeledElements!=null ) { for (int i=0; i<rblk.labeledElements.size(); i++) { AlternativeElement a = (AlternativeElement)rblk.labeledElements.elementAt(i); //System.out.println("looking at labeled element: "+a); // Variables for labeled rule refs and subrules are different than // variables for grammar atoms. This test is a little tricky because // we want to get all rule refs and ebnf, but not rule blocks or // syntactic predicates if ( a instanceof RuleRefElement || a instanceof AlternativeBlock && !(a instanceof RuleBlock) && !(a instanceof SynPredBlock) ) { if ( !(a instanceof RuleRefElement) && ((AlternativeBlock)a).not && analyzer.subruleCanBeInverted(((AlternativeBlock)a), grammar instanceof LexerGrammar) ) { // Special case for inverted subrules that will be // inlined. Treat these like token or char literal // references println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";"); if (grammar.buildAST) { genASTDeclaration( a ); } } else { if (grammar.buildAST) { // Always gen AST variables for labeled elements, // even if the element itself is marked with ! genASTDeclaration( a ); } if ( grammar instanceof LexerGrammar ) println(namespaceAntlr+"RefToken "+a.getLabel()+";"); if (grammar instanceof TreeWalkerGrammar) { // always generate rule-ref variables for tree walker println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";"); } } } else { // It is a token or literal reference. Generate the // correct variable type for this grammar println(labeledElementType + " " + a.getLabel() + " = " + labeledElementInit + ";"); // In addition, generate *_AST variables if building ASTs if (grammar.buildAST) { if (a instanceof GrammarAtom && ((GrammarAtom)a).getASTNodeType() != null ) { GrammarAtom ga = (GrammarAtom)a; genASTDeclaration( a, "Ref"+ga.getASTNodeType() ); } else { genASTDeclaration( a ); } } } } } } } public void genBody(LexerGrammar g) throws IOException { outputFile = grammar.getClassName() + ".cpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); //SAS: changed for proper text file io genAST = false; // no way to gen trees. saveText = true; // save consumed characters. tabs=0; // Generate header common to all C++ output files genHeader(outputFile); printHeaderAction(preIncludeCpp); // Generate header specific to lexer C++ file println("#include \"" + grammar.getClassName() + ".hpp\""); println("#include <antlr/CharBuffer.hpp>"); println("#include <antlr/TokenStreamException.hpp>"); println("#include <antlr/TokenStreamIOException.hpp>"); println("#include <antlr/TokenStreamRecognitionException.hpp>"); println("#include <antlr/CharStreamException.hpp>"); println("#include <antlr/CharStreamIOException.hpp>"); println("#include <antlr/NoViableAltForCharException.hpp>"); if (grammar.debuggingOutput) println("#include <antlr/DebuggingInputBuffer.hpp>"); println(""); printHeaderAction(postIncludeCpp); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); // Generate user-defined lexer file preamble printAction(grammar.preambleAction); // Generate lexer class definition String sup=null; if ( grammar.superClass!=null ) { sup = grammar.superClass; } else { sup = grammar.getSuperClass(); if (sup.lastIndexOf('.') != -1) sup = sup.substring(sup.lastIndexOf('.')+1); sup = namespaceAntlr + sup; } if( noConstructors ) { println("#if 0"); println("// constructor creation turned of with 'noConstructor' option"); } // // Generate the constructor from InputStream // println(grammar.getClassName() + "::" + grammar.getClassName() + "(" + namespaceStd + "istream& in)"); tabs++; // if debugging, wrap the input buffer in a debugger if (grammar.debuggingOutput) println(": " + sup + "(new "+namespaceAntlr+"DebuggingInputBuffer(new "+namespaceAntlr+"CharBuffer(in)),"+g.caseSensitive+")"); else println(": " + sup + "(new "+namespaceAntlr+"CharBuffer(in),"+g.caseSensitive+")"); tabs--; println("{"); tabs++; // if debugging, set up array variables and call user-overridable // debugging setup method if ( grammar.debuggingOutput ) { println("setRuleNames(_ruleNames);"); println("setSemPredNames(_semPredNames);"); println("setupDebugging();"); } // println("setCaseSensitive("+g.caseSensitive+");"); println("initLiterals();"); tabs--; println("}"); println(""); // Generate the constructor from InputBuffer println(grammar.getClassName() + "::" + grammar.getClassName() + "("+namespaceAntlr+"InputBuffer& ib)"); tabs++; // if debugging, wrap the input buffer in a debugger if (grammar.debuggingOutput) println(": " + sup + "(new "+namespaceAntlr+"DebuggingInputBuffer(ib),"+g.caseSensitive+")"); else println(": " + sup + "(ib,"+g.caseSensitive+")"); tabs--; println("{"); tabs++; // if debugging, set up array variables and call user-overridable // debugging setup method if ( grammar.debuggingOutput ) { println("setRuleNames(_ruleNames);"); println("setSemPredNames(_semPredNames);"); println("setupDebugging();"); } // println("setCaseSensitive("+g.caseSensitive+");"); println("initLiterals();"); tabs--; println("}"); println(""); // Generate the constructor from LexerSharedInputState println(grammar.getClassName() + "::" + grammar.getClassName() + "(const "+namespaceAntlr+"LexerSharedInputState& state)"); tabs++; println(": " + sup + "(state,"+g.caseSensitive+")"); tabs--; println("{"); tabs++; // if debugging, set up array variables and call user-overridable // debugging setup method if ( grammar.debuggingOutput ) { println("setRuleNames(_ruleNames);"); println("setSemPredNames(_semPredNames);"); println("setupDebugging();"); } // println("setCaseSensitive("+g.caseSensitive+");"); println("initLiterals();"); tabs--; println("}"); println(""); if( noConstructors ) { println("// constructor creation turned of with 'noConstructor' option"); println("#endif"); } println("void " + grammar.getClassName() + "::initLiterals()"); println("{"); tabs++; // Generate the initialization of the map // containing the string literals used in the lexer // The literals variable itself is in CharScanner Enumeration keys = grammar.tokenManager.getTokenSymbolKeys(); while ( keys.hasMoreElements() ) { String key = (String)keys.nextElement(); if ( key.charAt(0) != '"' ) { continue; } TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key); if ( sym instanceof StringLiteralSymbol ) { StringLiteralSymbol s = (StringLiteralSymbol)sym; println("literals["+s.getId()+"] = "+s.getTokenType()+";"); } } // Generate the setting of various generated options. tabs--; println("}"); Enumeration ids; // generate the rule name array for debugging if (grammar.debuggingOutput) { println("const char* "+grammar.getClassName()+"::_ruleNames[] = {"); tabs++; ids = grammar.rules.elements(); int ruleNum=0; while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol) ids.nextElement(); if ( sym instanceof RuleSymbol) println("\""+((RuleSymbol)sym).getId()+"\","); } println("0"); tabs--; println("};"); } // Generate nextToken() rule. // nextToken() is a synthetic lexer rule that is the implicit OR of all // user-defined lexer rules. genNextToken(); // Generate code for each rule in the lexer ids = grammar.rules.elements(); int ruleNum=0; while ( ids.hasMoreElements() ) { RuleSymbol sym = (RuleSymbol) ids.nextElement(); // Don't generate the synthetic rules if (!sym.getId().equals("mnextToken")) { genRule(sym, false, ruleNum++, grammar.getClassName() + "::"); } exitIfError(); } // Generate the semantic predicate map for debugging if (grammar.debuggingOutput) genSemPredMap(grammar.getClassName() + "::"); // Generate the bitsets used throughout the lexer genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size(), grammar.getClassName() + "::" ); println(""); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Close the lexer output stream currentOutput.close(); currentOutput = null; } public void genInitFactory( Grammar g ) { // Generate the method to initialize an ASTFactory when we're // building AST's String param_name = "factory "; if( ! g.buildAST ) param_name = ""; println("void "+ g.getClassName() + "::initializeASTFactory( "+namespaceAntlr+"ASTFactory& "+param_name+")"); println("{"); tabs++; if( g.buildAST ) { // sort out custom AST types... synchronize token manager with token // specs on rules (and other stuff we were able to see from // action.g) (imperfect of course) TokenManager tm = grammar.tokenManager; Enumeration tokens = tm.getTokenSymbolKeys(); while( tokens.hasMoreElements() ) { String tok = (String)tokens.nextElement(); TokenSymbol ts = tm.getTokenSymbol(tok); // if we have a custom type and there's not a more local override // of the tokentype then mark this as the type for the tokentype if( ts.getASTNodeType() != null ) { // ensure capacity with this pseudo vector... astTypes.ensureCapacity(ts.getTokenType()); String type = (String)astTypes.elementAt(ts.getTokenType()); if( type == null ) astTypes.setElementAt(ts.getASTNodeType(),ts.getTokenType()); else { // give a warning over action taken if the types are unequal if( ! ts.getASTNodeType().equals(type) ) { antlrTool.warning("Token "+tok+" taking most specific AST type",grammar.getFilename(),1,1); antlrTool.warning(" using "+type+" ignoring "+ts.getASTNodeType(),grammar.getFilename(),1,1); } } } } // now actually write out all the registered types. (except the default // type. for( int i = 0; i < astTypes.size(); i++ ) { String type = (String)astTypes.elementAt(i); if( type != null ) { println("factory.registerFactory("+i +", \""+type+"\", "+type+"::factory);"); } } println("factory.setMaxNodeType("+grammar.tokenManager.maxTokenType()+");"); } tabs--; println("}"); } // FIXME: and so why are we passing here a g param while inside // we merrily use the global grammar. public void genBody(ParserGrammar g) throws IOException { // Open the output stream for the parser and set the currentOutput outputFile = grammar.getClassName() + ".cpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); genAST = grammar.buildAST; tabs = 0; // Generate the header common to all output files. genHeader(outputFile); printHeaderAction(preIncludeCpp); // Generate header for the parser println("#include \"" + grammar.getClassName() + ".hpp\""); println("#include <antlr/NoViableAltException.hpp>"); println("#include <antlr/SemanticException.hpp>"); println("#include <antlr/ASTFactory.hpp>"); printHeaderAction(postIncludeCpp); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); // Output the user-defined parser preamble printAction(grammar.preambleAction); String sup=null; if ( grammar.superClass!=null ) sup = grammar.superClass; else { sup = grammar.getSuperClass(); if (sup.lastIndexOf('.') != -1) sup = sup.substring(sup.lastIndexOf('.')+1); sup = namespaceAntlr + sup; } // set up an array of all the rule names so the debugger can // keep track of them only by number -- less to store in tree... if (grammar.debuggingOutput) { println("const char* "+grammar.getClassName()+"::_ruleNames[] = {"); tabs++; Enumeration ids = grammar.rules.elements(); int ruleNum=0; while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol) ids.nextElement(); if ( sym instanceof RuleSymbol) println("\""+((RuleSymbol)sym).getId()+"\","); } println("0"); tabs--; println("};"); } // Generate _initialize function // disabled since it isn't used anymore.. // println("void " + grammar.getClassName() + "::_initialize(void)"); // println("{"); // tabs++; // if debugging, set up arrays and call the user-overridable // debugging setup method // if ( grammar.debuggingOutput ) { // println("setRuleNames(_ruleNames);"); // println("setSemPredNames(_semPredNames);"); // println("setupDebugging();"); // } // tabs--; // println("}"); if( noConstructors ) { println("#if 0"); println("// constructor creation turned of with 'noConstructor' option"); } // Generate parser class constructor from TokenBuffer print(grammar.getClassName() + "::" + grammar.getClassName()); println("("+namespaceAntlr+"TokenBuffer& tokenBuf, int k)"); println(": " + sup + "(tokenBuf,k)"); println("{"); // tabs++; // println("_initialize();"); // tabs--; println("}"); println(""); print(grammar.getClassName() + "::" + grammar.getClassName()); println("("+namespaceAntlr+"TokenBuffer& tokenBuf)"); println(": " + sup + "(tokenBuf," + grammar.maxk + ")"); println("{"); // tabs++; // println("_initialize();"); // tabs--; println("}"); println(""); // Generate parser class constructor from TokenStream print(grammar.getClassName() + "::" + grammar.getClassName()); println("("+namespaceAntlr+"TokenStream& lexer, int k)"); println(": " + sup + "(lexer,k)"); println("{"); // tabs++; // println("_initialize();"); // tabs--; println("}"); println(""); print(grammar.getClassName() + "::" + grammar.getClassName()); println("("+namespaceAntlr+"TokenStream& lexer)"); println(": " + sup + "(lexer," + grammar.maxk + ")"); println("{"); // tabs++; // println("_initialize();"); // tabs--; println("}"); println(""); print(grammar.getClassName() + "::" + grammar.getClassName()); println("(const "+namespaceAntlr+"ParserSharedInputState& state)"); println(": " + sup + "(state," + grammar.maxk + ")"); println("{"); // tabs++; // println("_initialize();"); // tabs--; println("}"); println(""); if( noConstructors ) { println("// constructor creation turned of with 'noConstructor' option"); println("#endif"); } astTypes = new Vector(); // Generate code for each rule in the grammar Enumeration ids = grammar.rules.elements(); int ruleNum=0; while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol) ids.nextElement(); if ( sym instanceof RuleSymbol) { RuleSymbol rs = (RuleSymbol)sym; genRule(rs, rs.references.size()==0, ruleNum++, grammar.getClassName() + "::"); } exitIfError(); } genInitFactory( g ); // Generate the token names genTokenStrings(grammar.getClassName() + "::"); // Generate the bitsets used throughout the grammar genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType(), grammar.getClassName() + "::" ); // Generate the semantic predicate map for debugging if (grammar.debuggingOutput) genSemPredMap(grammar.getClassName() + "::"); // Close class definition println(""); println(""); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Close the parser output stream currentOutput.close(); currentOutput = null; } public void genBody(TreeWalkerGrammar g) throws IOException { // Open the output stream for the parser and set the currentOutput outputFile = grammar.getClassName() + ".cpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); //SAS: changed for proper text file io genAST = grammar.buildAST; tabs = 0; // Generate the header common to all output files. genHeader(outputFile); printHeaderAction(preIncludeCpp); // Generate header for the parser println("#include \"" + grammar.getClassName() + ".hpp\""); println("#include <antlr/Token.hpp>"); println("#include <antlr/AST.hpp>"); println("#include <antlr/NoViableAltException.hpp>"); println("#include <antlr/MismatchedTokenException.hpp>"); println("#include <antlr/SemanticException.hpp>"); println("#include <antlr/BitSet.hpp>"); printHeaderAction(postIncludeCpp); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); // Output the user-defined parser premamble printAction(grammar.preambleAction); // Generate parser class definition String sup = null; if ( grammar.superClass!=null ) { sup = grammar.superClass; } else { sup = grammar.getSuperClass(); if (sup.lastIndexOf('.') != -1) sup = sup.substring(sup.lastIndexOf('.')+1); sup = namespaceAntlr + sup; } if( noConstructors ) { println("#if 0"); println("// constructor creation turned of with 'noConstructor' option"); } // Generate default parser class constructor println(grammar.getClassName() + "::" + grammar.getClassName() + "()"); println("\t: "+namespaceAntlr+"TreeParser() {"); tabs++; // println("setTokenNames(_tokenNames);"); tabs--; println("}"); if( noConstructors ) { println("// constructor creation turned of with 'noConstructor' option"); println("#endif"); } println(""); astTypes = new Vector(); // Generate code for each rule in the grammar Enumeration ids = grammar.rules.elements(); int ruleNum=0; String ruleNameInits = ""; while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol) ids.nextElement(); if ( sym instanceof RuleSymbol) { RuleSymbol rs = (RuleSymbol)sym; genRule(rs, rs.references.size()==0, ruleNum++, grammar.getClassName() + "::"); } exitIfError(); } // Generate the ASTFactory initialization function genInitFactory( grammar ); // Generate the token names genTokenStrings(grammar.getClassName() + "::"); // Generate the bitsets used throughout the grammar genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType(), grammar.getClassName() + "::" ); // Close class definition println(""); println(""); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Close the parser output stream currentOutput.close(); currentOutput = null; } /** Generate a series of case statements that implement a BitSet test. * @param p The Bitset for which cases are to be generated */ protected void genCases(BitSet p) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genCases("+p+")"); int[] elems; elems = p.toArray(); // Wrap cases four-per-line for lexer, one-per-line for parser int wrap = 1; //(grammar instanceof LexerGrammar) ? 4 : 1; int j=1; boolean startOfLine = true; for (int i = 0; i < elems.length; i++) { if (j==1) { print(""); } else { _print(" "); } _print("case " + getValueString(elems[i]) + ":"); if (j==wrap) { _println(""); startOfLine = true; j=1; } else { j++; startOfLine = false; } } if (!startOfLine) { _println(""); } } /** Generate common code for a block of alternatives; return a postscript * that needs to be generated at the end of the block. Other routines * may append else-clauses and such for error checking before the postfix * is generated. * If the grammar is a lexer, then generate alternatives in an order where * alternatives requiring deeper lookahead are generated first, and * EOF in the lookahead set reduces the depth of the lookahead. * @param blk The block to generate * @param noTestForSingle If true, then it does not generate a test for a single alternative. */ public CppBlockFinishingInfo genCommonBlock( AlternativeBlock blk, boolean noTestForSingle ) { int nIF=0; boolean createdLL1Switch = false; int closingBracesOfIFSequence = 0; CppBlockFinishingInfo finishingInfo = new CppBlockFinishingInfo(); if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genCommonBlk("+blk+")"); // Save the AST generation state, and set it to that of the block boolean savegenAST = genAST; genAST = genAST && blk.getAutoGen(); boolean oldsaveTest = saveText; saveText = saveText && blk.getAutoGen(); // Is this block inverted? If so, generate special-case code if ( blk.not && analyzer.subruleCanBeInverted(blk, grammar instanceof LexerGrammar) ) { Lookahead p = analyzer.look(1, blk); // Variable assignment for labeled elements if (blk.getLabel() != null && syntacticPredLevel == 0) { println(blk.getLabel() + " = " + lt1Value + ";"); } // AST genElementAST(blk); String astArgs=""; if (grammar instanceof TreeWalkerGrammar) { if( usingCustomAST ) astArgs=namespaceAntlr+"RefAST"+"(_t),"; else astArgs="_t,"; } // match the bitset for the alternative println("match(" + astArgs + getBitsetName(markBitsetForGen(p.fset)) + ");"); // tack on tree cursor motion if doing a tree walker if (grammar instanceof TreeWalkerGrammar) { println("_t = _t->getNextSibling();"); } return finishingInfo; } // Special handling for single alt if (blk.getAlternatives().size() == 1) { Alternative alt = blk.getAlternativeAt(0); // Generate a warning if there is a synPred for single alt. if (alt.synPred != null) { antlrTool.warning( "Syntactic predicate superfluous for single alternative", grammar.getFilename(), blk.getAlternativeAt(0).synPred.getLine(), blk.getAlternativeAt(0).synPred.getColumn() ); } if (noTestForSingle) { if (alt.semPred != null) { // Generate validating predicate genSemPred(alt.semPred, blk.line); } genAlt(alt, blk); return finishingInfo; } } // count number of simple LL(1) cases; only do switch for // many LL(1) cases (no preds, no end of token refs) // We don't care about exit paths for (...)*, (...)+ // because we don't explicitly have a test for them // as an alt in the loop. // // Also, we now count how many unicode lookahead sets // there are--they must be moved to DEFAULT or ELSE // clause. int nLL1 = 0; for (int i=0; i<blk.getAlternatives().size(); i++) { Alternative a = blk.getAlternativeAt(i); if ( suitableForCaseExpression(a) ) nLL1++; } // do LL(1) cases if ( nLL1 >= makeSwitchThreshold ) { // Determine the name of the item to be compared String testExpr = lookaheadString(1); createdLL1Switch = true; // when parsing trees, convert null to valid tree node with NULL lookahead if ( grammar instanceof TreeWalkerGrammar ) { println("if (_t == "+labeledElementASTInit+" )"); tabs++; println("_t = ASTNULL;"); tabs--; } println("switch ( "+testExpr+") {"); for (int i=0; i<blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); // ignore any non-LL(1) alts, predicated alts or end-of-token alts // or end-of-token alts for case expressions if ( !suitableForCaseExpression(alt) ) { continue; } Lookahead p = alt.cache[1]; if (p.fset.degree() == 0 && !p.containsEpsilon()) { antlrTool.warning("Alternate omitted due to empty prediction set", grammar.getFilename(), alt.head.getLine(), alt.head.getColumn()); } else { genCases(p.fset); println("{"); tabs++; genAlt(alt, blk); println("break;"); tabs--; println("}"); } } println("default:"); tabs++; } // do non-LL(1) and nondeterministic cases // This is tricky in the lexer, because of cases like: // STAR : '*' ; // ASSIGN_STAR : "*="; // Since nextToken is generated without a loop, then the STAR will // have end-of-token as it's lookahead set for LA(2). So, we must generate the // alternatives containing trailing end-of-token in their lookahead sets *after* // the alternatives without end-of-token. This implements the usual // lexer convention that longer matches come before shorter ones, e.g. // "*=" matches ASSIGN_STAR not STAR // // For non-lexer grammars, this does not sort the alternates by depth // Note that alts whose lookahead is purely end-of-token at k=1 end up // as default or else clauses. int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk : 0; for (int altDepth = startDepth; altDepth >= 0; altDepth--) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("checking depth "+altDepth); for (int i=0; i<blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genAlt: "+i); // if we made a switch above, ignore what we already took care // of. Specifically, LL(1) alts with no preds // that do not have end-of-token in their prediction set if ( createdLL1Switch && suitableForCaseExpression(alt) ) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("ignoring alt because it was in the switch"); continue; } String e; boolean unpredicted = false; if (grammar instanceof LexerGrammar) { // Calculate the "effective depth" of the alt, which is the max // depth at which cache[depth]!=end-of-token int effectiveDepth = alt.lookaheadDepth; if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) { // use maximum lookahead effectiveDepth = grammar.maxk; } while ( effectiveDepth >= 1 && alt.cache[effectiveDepth].containsEpsilon() ) { effectiveDepth--; } // Ignore alts whose effective depth is other than the ones we // are generating for this iteration. if (effectiveDepth != altDepth) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("ignoring alt because effectiveDepth!=altDepth;"+effectiveDepth+"!="+altDepth); continue; } unpredicted = lookaheadIsEmpty(alt, effectiveDepth); e = getLookaheadTestExpression(alt, effectiveDepth); } else { unpredicted = lookaheadIsEmpty(alt, grammar.maxk); e = getLookaheadTestExpression(alt, grammar.maxk); } // Was it a big unicode range that forced unsuitability // for a case expression? if ( alt.cache[1].fset.degree() > caseSizeThreshold && suitableForCaseExpression(alt)) { if ( nIF==0 ) { // generate this only for the first if the elseif's // are covered by this one if ( grammar instanceof TreeWalkerGrammar ) { println("if (_t == "+labeledElementASTInit+" )"); tabs++; println("_t = ASTNULL;"); tabs--; } println("if " + e + " {"); } else println("else if " + e + " {"); } else if (unpredicted && alt.semPred==null && alt.synPred==null) { // The alt has empty prediction set and no // predicate to help out. if we have not // generated a previous if, just put {...} around // the end-of-token clause if ( nIF==0 ) { println("{"); } else { println("else {"); } finishingInfo.needAnErrorClause = false; } else { // check for sem and syn preds // Add any semantic predicate expression to the lookahead test if ( alt.semPred != null ) { // if debugging, wrap the evaluation of the predicate in a method // // translate $ and # references ActionTransInfo tInfo = new ActionTransInfo(); String actionStr = processActionForSpecialSymbols(alt.semPred, blk.line, currentRule, tInfo); // ignore translation info...we don't need to do anything with it. // call that will inform SemanticPredicateListeners of the // result if ( grammar.debuggingOutput && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)) ) e = "("+e+"&& fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING,"+ //FIXME addSemPred(charFormatter.escapeString(actionStr))+","+actionStr+"))"; else e = "("+e+"&&("+actionStr +"))"; } // Generate any syntactic predicates if ( nIF>0 ) { if ( alt.synPred != null ) { println("else {"); tabs++; genSynPred( alt.synPred, e ); closingBracesOfIFSequence++; } else { println("else if " + e + " {"); } } else { if ( alt.synPred != null ) { genSynPred( alt.synPred, e ); } else { // when parsing trees, convert null to valid tree node // with NULL lookahead. if ( grammar instanceof TreeWalkerGrammar ) { println("if (_t == "+labeledElementASTInit+" )"); tabs++; println("_t = ASTNULL;"); tabs--; } println("if " + e + " {"); } } } nIF++; tabs++; genAlt(alt, blk); tabs--; println("}"); } } String ps = ""; for (int i=1; i<=closingBracesOfIFSequence; i++) { tabs--; // does JavaCodeGenerator need this? ps+="}"; } // Restore the AST generation state genAST = savegenAST; // restore save text state saveText=oldsaveTest; // Return the finishing info. if ( createdLL1Switch ) { tabs--; finishingInfo.postscript = ps+"}"; finishingInfo.generatedSwitch = true; finishingInfo.generatedAnIf = nIF>0; //return new CppBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement } else { finishingInfo.postscript = ps; finishingInfo.generatedSwitch = false; finishingInfo.generatedAnIf = nIF>0; //return new CppBlockFinishingInfo(ps, false,nIF>0); } return finishingInfo; } private static boolean suitableForCaseExpression(Alternative a) { return a.lookaheadDepth == 1 && a.semPred == null && !a.cache[1].containsEpsilon() && a.cache[1].fset.degree()<=caseSizeThreshold; } /** Generate code to link an element reference into the AST */ private void genElementAST(AlternativeElement el) { // handle case where you're not building trees, but are in tree walker. // Just need to get labels set up. if ( grammar instanceof TreeWalkerGrammar && !grammar.buildAST ) { String elementRef; String astName; // Generate names and declarations of the AST variable(s) if (el.getLabel() == null) { elementRef = lt1Value; // Generate AST variables for unlabeled stuff astName = "tmp" + astVarNumber + "_AST"; astVarNumber++; // Map the generated AST variable in the alternate mapTreeVariable(el, astName); // Generate an "input" AST variable also println(labeledElementASTType+" "+astName+"_in = "+elementRef+";"); } return; } if (grammar.buildAST && syntacticPredLevel == 0) { boolean needASTDecl = ( genAST && (el.getLabel() != null || el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG )); // RK: if we have a grammar element always generate the decl // since some guy can access it from an action and we can't // peek ahead (well not without making a mess). // I'd prefer taking this out. if( el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG && (el instanceof TokenRefElement) ) needASTDecl = true; boolean doNoGuessTest = ( grammar.hasSyntacticPredicate && needASTDecl ); String elementRef; String astNameBase; // Generate names and declarations of the AST variable(s) if (el.getLabel() != null) { // if the element is labeled use that name... elementRef = el.getLabel(); astNameBase = el.getLabel(); } else { // else generate a temporary name... elementRef = lt1Value; // Generate AST variables for unlabeled stuff astNameBase = "tmp" + astVarNumber; astVarNumber++; } // Generate the declaration if required. if ( needASTDecl ) { if ( el instanceof GrammarAtom ) { GrammarAtom ga = (GrammarAtom)el; if ( ga.getASTNodeType()!=null ) { genASTDeclaration( el, astNameBase, "Ref"+ga.getASTNodeType() ); // println("Ref"+ga.getASTNodeType()+" " + astName + ";"); } else { genASTDeclaration( el, astNameBase, labeledElementASTType ); // println(labeledElementASTType+" " + astName + " = "+labeledElementASTInit+";"); } } else { genASTDeclaration( el, astNameBase, labeledElementASTType ); // println(labeledElementASTType+" " + astName + " = "+labeledElementASTInit+";"); } } // for convenience.. String astName = astNameBase + "_AST"; // Map the generated AST variable in the alternate mapTreeVariable(el, astName); if (grammar instanceof TreeWalkerGrammar) { // Generate an "input" AST variable also println(labeledElementASTType+" " + astName + "_in = "+labeledElementASTInit+";"); } // Enclose actions with !guessing if (doNoGuessTest) { println("if ( inputState->guessing == 0 ) {"); tabs++; } // if something has a label assume it will be used // so we must initialize the RefAST if (el.getLabel() != null) { if ( el instanceof GrammarAtom ) { println(astName + " = "+ getASTCreateString((GrammarAtom)el,elementRef) + ";"); } else { println(astName + " = "+ getASTCreateString(elementRef) + ";"); } } // if it has no label but a declaration exists initialize it. if( el.getLabel() == null && needASTDecl ) { elementRef = lt1Value; if ( el instanceof GrammarAtom ) { println(astName + " = "+ getASTCreateString((GrammarAtom)el,elementRef) + ";"); } else { println(astName + " = "+ getASTCreateString(elementRef) + ";"); } // Map the generated AST variable in the alternate if (grammar instanceof TreeWalkerGrammar) { // set "input" AST variable also println(astName + "_in = " + elementRef + ";"); } } if (genAST) { switch (el.getAutoGenType()) { case GrammarElement.AUTO_GEN_NONE: if( usingCustomAST || (el instanceof GrammarAtom && ((GrammarAtom)el).getASTNodeType() != null) ) println("astFactory->addASTChild(currentAST, "+namespaceAntlr+"RefAST("+ astName + "));"); else println("astFactory->addASTChild(currentAST, "+ astName + ");"); // println("astFactory.addASTChild(currentAST, "+namespaceAntlr+"RefAST(" + astName + "));"); break; case GrammarElement.AUTO_GEN_CARET: if( usingCustomAST || (el instanceof GrammarAtom && ((GrammarAtom)el).getASTNodeType() != null) ) println("astFactory->makeASTRoot(currentAST, "+namespaceAntlr+"RefAST(" + astName + "));"); else println("astFactory->makeASTRoot(currentAST, " + astName + ");"); break; default: break; } } if (doNoGuessTest) { tabs--; println("}"); } } } /** Close the try block and generate catch phrases * if the element has a labeled handler in the rule */ private void genErrorCatchForElement(AlternativeElement el) { if (el.getLabel() == null) return; String r = el.enclosingRuleName; if ( grammar instanceof LexerGrammar ) { r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName); } RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r); if (rs == null) { antlrTool.panic("Enclosing rule not found!"); } ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel()); if (ex != null) { tabs--; println("}"); genErrorHandler(ex); } } /** Generate the catch phrases for a user-specified error handler */ private void genErrorHandler(ExceptionSpec ex) { // Each ExceptionHandler in the ExceptionSpec is a separate catch for (int i = 0; i < ex.handlers.size(); i++) { ExceptionHandler handler = (ExceptionHandler)ex.handlers.elementAt(i); // Generate catch phrase println("catch (" + handler.exceptionTypeAndName.getText() + ") {"); tabs++; if (grammar.hasSyntacticPredicate) { println("if (inputState->guessing==0) {"); tabs++; } // When not guessing, execute user handler action ActionTransInfo tInfo = new ActionTransInfo(); genLineNo(handler.action); printAction( processActionForSpecialSymbols( handler.action.getText(), handler.action.getLine(), currentRule, tInfo ) ); genLineNo2(); if (grammar.hasSyntacticPredicate) { tabs--; println("} else {"); tabs++; // When guessing, rethrow exception println("throw;"); tabs--; println("}"); } // Close catch phrase tabs--; println("}"); } } /** Generate a try { opening if the element has a labeled handler in the rule */ private void genErrorTryForElement(AlternativeElement el) { if (el.getLabel() == null) return; String r = el.enclosingRuleName; if ( grammar instanceof LexerGrammar ) { r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName); } RuleSymbol rs = (RuleSymbol)grammar.getSymbol(r); if (rs == null) { antlrTool.panic("Enclosing rule not found!"); } ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel()); if (ex != null) { println("try { // for error handling"); tabs++; } } /** Generate a header that is common to all C++ files */ protected void genHeader(String fileName) { println("/* $ANTLR "+antlrTool.version+": "+ "\""+antlrTool.fileMinusPath(antlrTool.grammarFile)+"\""+ " -> "+ "\""+fileName+"\"$ */"); } // these are unique to C++ mode public void genInclude(LexerGrammar g) throws IOException { outputFile = grammar.getClassName() + ".hpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); //SAS: changed for proper text file io genAST = false; // no way to gen trees. saveText = true; // save consumed characters. tabs=0; // Generate a guard wrapper println("#ifndef INC_"+grammar.getClassName()+"_hpp_"); println("#define INC_"+grammar.getClassName()+"_hpp_"); println(""); printHeaderAction(preIncludeHpp); println("#include <antlr/config.hpp>"); // Generate header common to all C++ output files genHeader(outputFile); // Generate header specific to lexer header file println("#include <antlr/CommonToken.hpp>"); println("#include <antlr/InputBuffer.hpp>"); println("#include <antlr/BitSet.hpp>"); println("#include \"" + grammar.tokenManager.getName() + TokenTypesFileSuffix+".hpp\""); // Find the name of the super class String sup=null; if ( grammar.superClass!=null ) { sup = grammar.superClass; println("\n// Include correct superclass header with a header statement for example:"); println("// header \"post_include_hpp\" {"); println("// #include \""+sup+".hpp\""); println("// }"); println("// Or...."); println("// header {"); println("// #include \""+sup+".hpp\""); println("// }\n"); } else { sup = grammar.getSuperClass(); if (sup.lastIndexOf('.') != -1) sup = sup.substring(sup.lastIndexOf('.')+1); println("#include <antlr/"+sup+".hpp>"); sup = namespaceAntlr + sup; } // Do not use printAction because we assume tabs==0 printHeaderAction(postIncludeHpp); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); printHeaderAction(""); // print javadoc comment if any if ( grammar.comment!=null ) { _println(grammar.comment); } // Generate lexer class definition print("class CUSTOM_API " + grammar.getClassName() + " : public " + sup); println(", public " + grammar.tokenManager.getName() + TokenTypesFileSuffix); Token tsuffix = (Token)grammar.options.get("classHeaderSuffix"); if ( tsuffix != null ) { String suffix = StringUtils.stripFrontBack(tsuffix.getText(),"\"","\""); if ( suffix != null ) { print(", "+suffix); // must be an interface name for Java } } println("{"); // Generate user-defined lexer class members if (grammar.classMemberAction != null) { genLineNo(grammar.classMemberAction); print( processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null) ); genLineNo2(); } // Generate initLiterals() method tabs=0; println("private:"); tabs=1; println("void initLiterals();"); // Generate getCaseSensitiveLiterals() method tabs=0; println("public:"); tabs=1; println("bool getCaseSensitiveLiterals() const"); println("{"); tabs++; println("return "+g.caseSensitiveLiterals + ";"); tabs--; println("}"); // Make constructors public tabs=0; println("public:"); tabs=1; if( noConstructors ) { tabs = 0; println("#if 0"); println("// constructor creation turned of with 'noConstructor' option"); tabs = 1; } // Generate the constructor from std::istream println(grammar.getClassName() + "(" + namespaceStd + "istream& in);"); // Generate the constructor from InputBuffer println(grammar.getClassName() + "("+namespaceAntlr+"InputBuffer& ib);"); println(grammar.getClassName() + "(const "+namespaceAntlr+"LexerSharedInputState& state);"); if( noConstructors ) { tabs = 0; println("// constructor creation turned of with 'noConstructor' option"); println("#endif"); tabs = 1; } // Generate nextToken() rule. // nextToken() is a synthetic lexer rule that is the implicit OR of all // user-defined lexer rules. println(namespaceAntlr+"RefToken nextToken();"); // Generate code for each rule in the lexer Enumeration ids = grammar.rules.elements(); while ( ids.hasMoreElements() ) { RuleSymbol sym = (RuleSymbol) ids.nextElement(); // Don't generate the synthetic rules if (!sym.getId().equals("mnextToken")) { genRuleHeader(sym, false); } exitIfError(); } // Make the rest private tabs=0; println("private:"); tabs=1; // generate the rule name array for debugging if ( grammar.debuggingOutput ) { println("static const char* _ruleNames[];"); } // Generate the semantic predicate map for debugging if (grammar.debuggingOutput) println("static const char* _semPredNames[];"); // Generate the bitsets used throughout the lexer genBitsetsHeader(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size()); tabs=0; println("};"); println(""); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Generate a guard wrapper println("#endif /*INC_"+grammar.getClassName()+"_hpp_*/"); // Close the lexer output stream currentOutput.close(); currentOutput = null; } public void genInclude(ParserGrammar g) throws IOException { // Open the output stream for the parser and set the currentOutput outputFile = grammar.getClassName() + ".hpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); //SAS: changed for proper text file io genAST = grammar.buildAST; tabs = 0; // Generate a guard wrapper println("#ifndef INC_"+grammar.getClassName()+"_hpp_"); println("#define INC_"+grammar.getClassName()+"_hpp_"); println(""); printHeaderAction(preIncludeHpp); println("#include <antlr/config.hpp>"); // Generate the header common to all output files. genHeader(outputFile); // Generate header for the parser println("#include <antlr/TokenStream.hpp>"); println("#include <antlr/TokenBuffer.hpp>"); println("#include \"" + grammar.tokenManager.getName() + TokenTypesFileSuffix+".hpp\""); // Generate parser class definition String sup=null; if ( grammar.superClass!=null ) { sup = grammar.superClass; println("\n// Include correct superclass header with a header statement for example:"); println("// header \"post_include_hpp\" {"); println("// #include \""+sup+".hpp\""); println("// }"); println("// Or...."); println("// header {"); println("// #include \""+sup+".hpp\""); println("// }\n"); } else { sup = grammar.getSuperClass(); if (sup.lastIndexOf('.') != -1) sup = sup.substring(sup.lastIndexOf('.')+1); println("#include <antlr/"+sup+".hpp>"); sup = namespaceAntlr + sup; } println(""); // Do not use printAction because we assume tabs==0 printHeaderAction(postIncludeHpp); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); printHeaderAction(""); // print javadoc comment if any if ( grammar.comment!=null ) { _println(grammar.comment); } // generate the actual class definition print("class CUSTOM_API " + grammar.getClassName() + " : public " + sup); println(", public " + grammar.tokenManager.getName() + TokenTypesFileSuffix); Token tsuffix = (Token)grammar.options.get("classHeaderSuffix"); if ( tsuffix != null ) { String suffix = StringUtils.stripFrontBack(tsuffix.getText(),"\"","\""); if ( suffix != null ) print(", "+suffix); // must be an interface name for Java } println("{"); // set up an array of all the rule names so the debugger can // keep track of them only by number -- less to store in tree... if (grammar.debuggingOutput) { println("public: static const char* _ruleNames[];"); } // Generate user-defined parser class members if (grammar.classMemberAction != null) { genLineNo(grammar.classMemberAction.getLine()); print( processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null) ); genLineNo2(); } println("public:"); tabs = 1; println("void initializeASTFactory( "+namespaceAntlr+"ASTFactory& factory );"); // println("// called from constructors"); // println("void _initialize( void );"); // Generate parser class constructor from TokenBuffer tabs=0; if( noConstructors ) { println("#if 0"); println("// constructor creation turned of with 'noConstructor' option"); } println("protected:"); tabs=1; println(grammar.getClassName() + "("+namespaceAntlr+"TokenBuffer& tokenBuf, int k);"); tabs=0; println("public:"); tabs=1; println(grammar.getClassName() + "("+namespaceAntlr+"TokenBuffer& tokenBuf);"); // Generate parser class constructor from TokenStream tabs=0; println("protected:"); tabs=1; println(grammar.getClassName()+"("+namespaceAntlr+"TokenStream& lexer, int k);"); tabs=0; println("public:"); tabs=1; println(grammar.getClassName()+"("+namespaceAntlr+"TokenStream& lexer);"); println(grammar.getClassName()+"(const "+namespaceAntlr+"ParserSharedInputState& state);"); if( noConstructors ) { tabs = 0; println("// constructor creation turned of with 'noConstructor' option"); println("#endif"); tabs = 1; } println("int getNumTokens() const"); println("{"); tabs++; println("return "+grammar.getClassName()+"::NUM_TOKENS;"); tabs--; println("}"); println("const char* getTokenName( int type ) const"); println("{"); tabs++; println("if( type > getNumTokens() ) return 0;"); println("return "+grammar.getClassName()+"::tokenNames[type];"); tabs--; println("}"); println("const char* const* getTokenNames() const"); println("{"); tabs++; println("return "+grammar.getClassName()+"::tokenNames;"); tabs--; println("}"); // Generate code for each rule in the grammar Enumeration ids = grammar.rules.elements(); while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol) ids.nextElement(); if ( sym instanceof RuleSymbol) { RuleSymbol rs = (RuleSymbol)sym; genRuleHeader(rs, rs.references.size()==0); } exitIfError(); } // RK: when we are using a custom ast override Parser::getAST to return // the custom AST type. Ok, this does not work anymore with newer // compilers gcc 3.2.x and up. The reference counter is probably // getting in the way. // So now we just patch the return type back to RefAST tabs = 0; println("public:"); tabs = 1; println(namespaceAntlr+"RefAST getAST()"); println("{"); if( usingCustomAST ) { tabs++; println("return "+namespaceAntlr+"RefAST(returnAST);"); tabs--; } else { tabs++; println("return returnAST;"); tabs--; } println("}"); println(""); tabs=0; println("protected:"); tabs=1; println(labeledElementASTType+" returnAST;"); // Make the rest private tabs=0; println("private:"); tabs=1; // Generate the token names println("static const char* tokenNames[];"); // and how many there are of them _println("#ifndef NO_STATIC_CONSTS"); println("static const int NUM_TOKENS = "+grammar.tokenManager.getVocabulary().size()+";"); _println("#else"); println("enum {"); println("\tNUM_TOKENS = "+grammar.tokenManager.getVocabulary().size()); println("};"); _println("#endif"); // Generate the bitsets used throughout the grammar genBitsetsHeader(bitsetsUsed, grammar.tokenManager.maxTokenType()); // Generate the semantic predicate map for debugging if (grammar.debuggingOutput) println("static const char* _semPredNames[];"); // Close class definition tabs=0; println("};"); println(""); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Generate a guard wrapper println("#endif /*INC_"+grammar.getClassName()+"_hpp_*/"); // Close the parser output stream currentOutput.close(); currentOutput = null; } public void genInclude(TreeWalkerGrammar g) throws IOException { // Open the output stream for the parser and set the currentOutput outputFile = grammar.getClassName() + ".hpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); //SAS: changed for proper text file io genAST = grammar.buildAST; tabs = 0; // Generate a guard wrapper println("#ifndef INC_"+grammar.getClassName()+"_hpp_"); println("#define INC_"+grammar.getClassName()+"_hpp_"); println(""); printHeaderAction(preIncludeHpp); println("#include <antlr/config.hpp>"); println("#include \"" + grammar.tokenManager.getName() + TokenTypesFileSuffix+".hpp\""); // Generate the header common to all output files. genHeader(outputFile); // Find the name of the super class String sup=null; if ( grammar.superClass!=null ) { sup = grammar.superClass; println("\n// Include correct superclass header with a header statement for example:"); println("// header \"post_include_hpp\" {"); println("// #include \""+sup+".hpp\""); println("// }"); println("// Or...."); println("// header {"); println("// #include \""+sup+".hpp\""); println("// }\n"); } else { sup = grammar.getSuperClass(); if (sup.lastIndexOf('.') != -1) sup = sup.substring(sup.lastIndexOf('.')+1); println("#include <antlr/"+sup+".hpp>"); sup = namespaceAntlr + sup; } println(""); // Generate header for the parser // // Do not use printAction because we assume tabs==0 printHeaderAction(postIncludeHpp); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); printHeaderAction(""); // print javadoc comment if any if ( grammar.comment!=null ) { _println(grammar.comment); } // Generate parser class definition print("class CUSTOM_API " + grammar.getClassName() + " : public "+sup); println(", public " + grammar.tokenManager.getName() + TokenTypesFileSuffix); Token tsuffix = (Token)grammar.options.get("classHeaderSuffix"); if ( tsuffix != null ) { String suffix = StringUtils.stripFrontBack(tsuffix.getText(),"\"","\""); if ( suffix != null ) { print(", "+suffix); // must be an interface name for Java } } println("{"); // Generate user-defined parser class members if (grammar.classMemberAction != null) { genLineNo(grammar.classMemberAction.getLine()); print( processActionForSpecialSymbols(grammar.classMemberAction.getText(), grammar.classMemberAction.getLine(), currentRule, null) ); genLineNo2(); } // Generate default parser class constructor tabs=0; println("public:"); if( noConstructors ) { println("#if 0"); println("// constructor creation turned of with 'noConstructor' option"); } tabs=1; println(grammar.getClassName() + "();"); if( noConstructors ) { tabs = 0; println("#endif"); tabs = 1; } // Generate declaration for the initializeFactory method println("static void initializeASTFactory( "+namespaceAntlr+"ASTFactory& factory );"); println("int getNumTokens() const"); println("{"); tabs++; println("return "+grammar.getClassName()+"::NUM_TOKENS;"); tabs--; println("}"); println("const char* getTokenName( int type ) const"); println("{"); tabs++; println("if( type > getNumTokens() ) return 0;"); println("return "+grammar.getClassName()+"::tokenNames[type];"); tabs--; println("}"); println("const char* const* getTokenNames() const"); println("{"); tabs++; println("return "+grammar.getClassName()+"::tokenNames;"); tabs--; println("}"); // Generate code for each rule in the grammar Enumeration ids = grammar.rules.elements(); String ruleNameInits = ""; while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol) ids.nextElement(); if ( sym instanceof RuleSymbol) { RuleSymbol rs = (RuleSymbol)sym; genRuleHeader(rs, rs.references.size()==0); } exitIfError(); } tabs = 0; println("public:"); tabs = 1; println(namespaceAntlr+"RefAST getAST()"); println("{"); if( usingCustomAST ) { tabs++; println("return "+namespaceAntlr+"RefAST(returnAST);"); tabs--; } else { tabs++; println("return returnAST;"); tabs--; } println("}"); println(""); tabs=0; println("protected:"); tabs=1; println(labeledElementASTType+" returnAST;"); println(labeledElementASTType+" _retTree;"); // Make the rest private tabs=0; println("private:"); tabs=1; // Generate the token names println("static const char* tokenNames[];"); // and how many there are of them _println("#ifndef NO_STATIC_CONSTS"); println("static const int NUM_TOKENS = "+grammar.tokenManager.getVocabulary().size()+";"); _println("#else"); println("enum {"); println("\tNUM_TOKENS = "+grammar.tokenManager.getVocabulary().size()); println("};"); _println("#endif"); // Generate the bitsets used throughout the grammar genBitsetsHeader(bitsetsUsed, grammar.tokenManager.maxTokenType()); // Close class definition tabs=0; println("};"); println(""); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Generate a guard wrapper println("#endif /*INC_"+grammar.getClassName()+"_hpp_*/"); // Close the parser output stream currentOutput.close(); currentOutput = null; } /// for convenience protected void genASTDeclaration( AlternativeElement el ) { genASTDeclaration( el, labeledElementASTType ); } /// for convenience protected void genASTDeclaration( AlternativeElement el, String node_type ) { genASTDeclaration( el, el.getLabel(), node_type ); } /// Generate (if not already done) a declaration for the AST for el. protected void genASTDeclaration( AlternativeElement el, String var_name, String node_type ) { // already declared? if( declaredASTVariables.contains(el) ) return; String init = labeledElementASTInit; if (el instanceof GrammarAtom && ((GrammarAtom)el).getASTNodeType() != null ) init = "Ref"+((GrammarAtom)el).getASTNodeType()+"("+labeledElementASTInit+")"; // emit code println(node_type+" " + var_name + "_AST = "+init+";"); // mark as declared declaredASTVariables.put(el, el); } private void genLiteralsTest() { println("_ttype = testLiteralsTable(_ttype);"); } private void genLiteralsTestForPartialToken() { println("_ttype = testLiteralsTable(text.substr(_begin, text.length()-_begin),_ttype);"); } protected void genMatch(BitSet b) { } protected void genMatch(GrammarAtom atom) { if ( atom instanceof StringLiteralElement ) { if ( grammar instanceof LexerGrammar ) { genMatchUsingAtomText(atom); } else { genMatchUsingAtomTokenType(atom); } } else if ( atom instanceof CharLiteralElement ) { // Lexer case is handled in the gen( CharLiteralElement x ) antlrTool.error("cannot ref character literals in grammar: "+atom); } else if ( atom instanceof TokenRefElement ) { genMatchUsingAtomTokenType(atom); } else if (atom instanceof WildcardElement) { gen((WildcardElement)atom); } } protected void genMatchUsingAtomText(GrammarAtom atom) { // match() for trees needs the _t cursor String astArgs=""; if (grammar instanceof TreeWalkerGrammar) { if( usingCustomAST ) astArgs=namespaceAntlr+"RefAST"+"(_t),"; else astArgs="_t,"; } // if in lexer and ! on element, save buffer index to kill later if ( grammar instanceof LexerGrammar && (!saveText||atom.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) { println("_saveIndex = text.length();"); } print(atom.not ? "matchNot(" : "match("); _print(astArgs); // print out what to match if (atom.atomText.equals("EOF")) { // horrible hack to handle EOF case _print(namespaceAntlr+"Token::EOF_TYPE"); } else { if( grammar instanceof LexerGrammar ) // lexer needs special handling { String cppstring = convertJavaToCppString( atom.atomText, false ); _print(cppstring); } else _print(atom.atomText); } _println(");"); if ( grammar instanceof LexerGrammar && (!saveText||atom.getAutoGenType()==GrammarElement.AUTO_GEN_BANG) ) { println("text.erase(_saveIndex);"); // kill text atom put in buffer } } protected void genMatchUsingAtomTokenType(GrammarAtom atom) { // match() for trees needs the _t cursor String astArgs=""; if (grammar instanceof TreeWalkerGrammar) { if( usingCustomAST ) astArgs=namespaceAntlr+"RefAST"+"(_t),"; else astArgs="_t,"; } // If the literal can be mangled, generate the symbolic constant instead String s = astArgs + getValueString(atom.getType()); // matching println( (atom.not ? "matchNot(" : "match(") + s + ");"); } /** Generate the nextToken() rule. * nextToken() is a synthetic lexer rule that is the implicit OR of all * user-defined lexer rules. * @param RuleBlock */ public void genNextToken() { // Are there any public rules? If not, then just generate a // fake nextToken(). boolean hasPublicRules = false; for (int i = 0; i < grammar.rules.size(); i++) { RuleSymbol rs = (RuleSymbol)grammar.rules.elementAt(i); if ( rs.isDefined() && rs.access.equals("public") ) { hasPublicRules = true; break; } } if (!hasPublicRules) { println(""); println(namespaceAntlr+"RefToken "+grammar.getClassName()+"::nextToken() { return "+namespaceAntlr+"RefToken(new "+namespaceAntlr+"CommonToken("+namespaceAntlr+"Token::EOF_TYPE, \"\")); }"); println(""); return; } // Create the synthesized nextToken() rule RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken"); // Define the nextToken rule symbol RuleSymbol nextTokenRs = new RuleSymbol("mnextToken"); nextTokenRs.setDefined(); nextTokenRs.setBlock(nextTokenBlk); nextTokenRs.access = "private"; grammar.define(nextTokenRs); // Analyze the nextToken rule boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk); // Generate the next token rule String filterRule=null; if ( ((LexerGrammar)grammar).filterMode ) { filterRule = ((LexerGrammar)grammar).filterRule; } println(""); println(namespaceAntlr+"RefToken "+grammar.getClassName()+"::nextToken()"); println("{"); tabs++; println(namespaceAntlr+"RefToken theRetToken;"); println("for (;;) {"); tabs++; println(namespaceAntlr+"RefToken theRetToken;"); println("int _ttype = "+namespaceAntlr+"Token::INVALID_TYPE;"); if ( ((LexerGrammar)grammar).filterMode ) { println("setCommitToPath(false);"); if ( filterRule!=null ) { // Here's a good place to ensure that the filter rule actually exists if ( !grammar.isDefined(CodeGenerator.encodeLexerRuleName(filterRule)) ) { grammar.antlrTool.error("Filter rule "+filterRule+" does not exist in this lexer"); } else { RuleSymbol rs = (RuleSymbol)grammar.getSymbol(CodeGenerator.encodeLexerRuleName(filterRule)); if ( !rs.isDefined() ) { grammar.antlrTool.error("Filter rule "+filterRule+" does not exist in this lexer"); } else if ( rs.access.equals("public") ) { grammar.antlrTool.error("Filter rule "+filterRule+" must be protected"); } } println("int _m;"); println("_m = mark();"); } } println("resetText();"); // Generate try around whole thing to trap scanner errors println("try { // for lexical and char stream error handling"); tabs++; // Test for public lexical rules with empty paths for (int i=0; i<nextTokenBlk.getAlternatives().size(); i++) { Alternative a = nextTokenBlk.getAlternativeAt(i); if ( a.cache[1].containsEpsilon() ) { antlrTool.warning("found optional path in nextToken()"); } } // Generate the block String newline = System.getProperty("line.separator"); CppBlockFinishingInfo howToFinish = genCommonBlock(nextTokenBlk, false); String errFinish = "if (LA(1)==EOF_CHAR)"+newline+ "\t\t\t\t{"+newline+"\t\t\t\t\tuponEOF();"+newline+ "\t\t\t\t\t_returnToken = makeToken("+namespaceAntlr+"Token::EOF_TYPE);"+ newline+"\t\t\t\t}"; errFinish += newline+"\t\t\t\t"; if ( ((LexerGrammar)grammar).filterMode ) { if ( filterRule==null ) { errFinish += "else {consume(); goto tryAgain;}"; } else { errFinish += "else {"+newline+ "\t\t\t\t\tcommit();"+newline+ "\t\t\t\t\ttry {m"+filterRule+"(false);}"+newline+ "\t\t\t\t\tcatch("+namespaceAntlr+"RecognitionException& e) {"+newline+ "\t\t\t\t\t // catastrophic failure"+newline+ "\t\t\t\t\t reportError(e);"+newline+ "\t\t\t\t\t consume();"+newline+ "\t\t\t\t\t}"+newline+ "\t\t\t\t\tgoto tryAgain;"+newline+ "\t\t\t\t}"; } } else { errFinish += "else {"+throwNoViable+"}"; } genBlockFinish(howToFinish, errFinish); // at this point a valid token has been matched, undo "mark" that was done if ( ((LexerGrammar)grammar).filterMode && filterRule!=null ) { println("commit();"); } // Generate literals test if desired // make sure _ttype is set first; note _returnToken must be // non-null as the rule was required to create it. println("if ( !_returnToken )"+newline+ "\t\t\t\tgoto tryAgain; // found SKIP token"+newline); println("_ttype = _returnToken->getType();"); if ( ((LexerGrammar)grammar).getTestLiterals()) { genLiteralsTest(); } // return token created by rule reference in switch println("_returnToken->setType(_ttype);"); println("return _returnToken;"); // Close try block tabs--; println("}"); println("catch ("+namespaceAntlr+"RecognitionException& e) {"); tabs++; if ( ((LexerGrammar)grammar).filterMode ) { if ( filterRule==null ) { println("if ( !getCommitToPath() ) {"); tabs++; println("consume();"); println("goto tryAgain;"); tabs--; println("}"); } else { println("if ( !getCommitToPath() ) {"); tabs++; println("rewind(_m);"); println("resetText();"); println("try {m"+filterRule+"(false);}"); println("catch("+namespaceAntlr+"RecognitionException& ee) {"); println(" // horrendous failure: error in filter rule"); println(" reportError(ee);"); println(" consume();"); println("}"); // println("goto tryAgain;"); tabs--; println("}"); println("else"); } } if ( nextTokenBlk.getDefaultErrorHandler() ) { println("{"); tabs++; println("reportError(e);"); println("consume();"); tabs--; println("}"); } else { // pass on to invoking routine tabs++; println("throw "+namespaceAntlr+"TokenStreamRecognitionException(e);"); tabs--; } // close CharStreamException try tabs--; println("}"); println("catch ("+namespaceAntlr+"CharStreamIOException& csie) {"); println("\tthrow "+namespaceAntlr+"TokenStreamIOException(csie.io);"); println("}"); println("catch ("+namespaceAntlr+"CharStreamException& cse) {"); println("\tthrow "+namespaceAntlr+"TokenStreamException(cse.getMessage());"); println("}"); // close for-loop _println("tryAgain:;"); tabs--; println("}"); // close method nextToken tabs--; println("}"); println(""); } /** Gen a named rule block. * ASTs are generated for each element of an alternative unless * the rule or the alternative have a '!' modifier. * * If an alternative defeats the default tree construction, it * must set <rule>_AST to the root of the returned AST. * * Each alternative that does automatic tree construction, builds * up root and child list pointers in an ASTPair structure. * * A rule finishes by setting the returnAST variable from the * ASTPair. * * @param rule The name of the rule to generate * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere) */ public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum, String prefix) { // tabs=1; // JavaCodeGenerator needs this if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genRule("+ s.getId() +")"); if ( !s.isDefined() ) { antlrTool.error("undefined rule: "+ s.getId()); return; } // Generate rule return type, name, arguments RuleBlock rblk = s.getBlock(); currentRule = rblk; currentASTResult = s.getId(); // clear list of declared ast variables.. declaredASTVariables.clear(); // Save the AST generation state, and set it to that of the rule boolean savegenAST = genAST; genAST = genAST && rblk.getAutoGen(); // boolean oldsaveTest = saveText; saveText = rblk.getAutoGen(); // print javadoc comment if any if ( s.comment!=null ) { _println(s.comment); } // Gen method return type (note lexer return action set at rule creation) if (rblk.returnAction != null) { // Has specified return value _print(extractTypeOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + " "); } else { // No specified return value _print("void "); } // Gen method name _print(prefix + s.getId() + "("); // Additional rule parameters common to all rules for this grammar _print(commonExtraParams); if (commonExtraParams.length() != 0 && rblk.argAction != null ) { _print(","); } // Gen arguments if (rblk.argAction != null) { // Has specified arguments _println(""); // FIXME: make argAction also a token? Hmmmmm // genLineNo(rblk); tabs++; // Process arguments for default arguments // newer gcc's don't accept these in two places (header/cpp) // // Old appraoch with StringBuffer gave trouble with gcj. // // RK: Actually this breaks with string default arguments containing // a comma's or equal signs. Then again the old StringBuffer method // suffered from the same. String oldarg = rblk.argAction; String newarg = ""; String comma = ""; int eqpos = oldarg.indexOf( '=' ); if( eqpos != -1 ) { int cmpos = 0; while( cmpos != -1 && eqpos != -1 ) { newarg = newarg + comma + oldarg.substring( 0, eqpos ).trim(); comma = ", "; cmpos = oldarg.indexOf( ',', eqpos ); if( cmpos != -1 ) { // cut off part we just handled oldarg = oldarg.substring( cmpos+1 ).trim(); eqpos = oldarg.indexOf( '=' ); if( eqpos == -1 ) newarg = newarg+comma+oldarg; } } } else newarg = oldarg; println( newarg ); // println(rblk.argAction); tabs--; print(") "); // genLineNo2(); // gcc gives error on the brace... hope it works for the others too } else { // No specified arguments _print(") "); } _println("{"); tabs++; if (grammar.traceRules) { if ( grammar instanceof TreeWalkerGrammar ) { if ( usingCustomAST ) println("Tracer traceInOut(this,\""+ s.getId() +"\","+namespaceAntlr+"RefAST"+"(_t));"); else println("Tracer traceInOut(this,\""+ s.getId() +"\",_t);"); } else { println("Tracer traceInOut(this, \""+ s.getId() +"\");"); } } // Convert return action to variable declaration if (rblk.returnAction != null) { genLineNo(rblk); println(rblk.returnAction + ";"); genLineNo2(); } // print out definitions needed by rules for various grammar types if (!commonLocalVars.equals("")) println(commonLocalVars); if ( grammar instanceof LexerGrammar ) { // RK: why is this here? It seems not supported in the rest of the // tool. // lexer rule default return value is the rule's token name // This is a horrible hack to support the built-in EOF lexer rule. if (s.getId().equals("mEOF")) println("_ttype = "+namespaceAntlr+"Token::EOF_TYPE;"); else println("_ttype = "+ s.getId().substring(1)+";"); println(namespaceStd+"string::size_type _saveIndex;"); // used for element! (so we can kill text matched for element) /* println("boolean old_saveConsumedInput=saveConsumedInput;"); if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule println("saveConsumedInput=false;"); } */ } // if debugging, write code to mark entry to the rule if ( grammar.debuggingOutput) if (grammar instanceof ParserGrammar) println("fireEnterRule(" + ruleNum + ",0);"); else if (grammar instanceof LexerGrammar) println("fireEnterRule(" + ruleNum + ",_ttype);"); // Generate trace code if desired // if ( grammar.debuggingOutput || grammar.traceRules) { // println("try { // debugging"); // tabs++; // } // Initialize AST variables if (grammar instanceof TreeWalkerGrammar) { // "Input" value for rule // println(labeledElementASTType+" " + s.getId() + "_AST_in = "+labeledElementASTType+"(_t);"); println(labeledElementASTType+" " + s.getId() + "_AST_in = (_t == "+labeledElementASTType+"(ASTNULL)) ? "+labeledElementASTInit+" : _t;"); } if (grammar.buildAST) { // Parser member used to pass AST returns from rule invocations println("returnAST = "+labeledElementASTInit+";"); // Tracks AST construction println(namespaceAntlr+"ASTPair currentAST;"); // = new ASTPair();"); // User-settable return value for rule. println(labeledElementASTType+" " + s.getId() + "_AST = "+labeledElementASTInit+";"); } genBlockPreamble(rblk); genBlockInitAction(rblk); println(""); // Search for an unlabeled exception specification attached to the rule ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec(""); // Generate try block around the entire rule for error handling if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler() ) { println("try { // for error handling"); tabs++; } // Generate the alternatives if ( rblk.alternatives.size()==1 ) { // One alternative -- use simple form Alternative alt = rblk.getAlternativeAt(0); String pred = alt.semPred; if ( pred!=null ) genSemPred(pred, currentRule.line); if (alt.synPred != null) { antlrTool.warning( "Syntactic predicate ignored for single alternative", grammar.getFilename(), alt.synPred.getLine(), alt.synPred.getColumn() ); } genAlt(alt, rblk); } else { // Multiple alternatives -- generate complex form boolean ok = grammar.theLLkAnalyzer.deterministic(rblk); CppBlockFinishingInfo howToFinish = genCommonBlock(rblk, false); genBlockFinish(howToFinish, throwNoViable); } // Generate catch phrase for error handling if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler() ) { // Close the try block tabs--; println("}"); } // Generate user-defined or default catch phrases if (unlabeledUserSpec != null) { genErrorHandler(unlabeledUserSpec); } else if (rblk.getDefaultErrorHandler()) { // Generate default catch phrase println("catch (" + exceptionThrown + "& ex) {"); tabs++; // Generate code to handle error if not guessing if (grammar.hasSyntacticPredicate) { println("if( inputState->guessing == 0 ) {"); tabs++; } println("reportError(ex);"); if ( !(grammar instanceof TreeWalkerGrammar) ) { // Generate code to consume until token in k==1 follow set Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, rblk.endNode); String followSetName = getBitsetName(markBitsetForGen(follow.fset)); println("recover(ex," + followSetName + ");"); } else { // Just consume one token println("if ( _t != "+labeledElementASTInit+" )"); tabs++; println("_t = _t->getNextSibling();"); tabs--; } if (grammar.hasSyntacticPredicate) { tabs--; // When guessing, rethrow exception println("} else {"); tabs++; println("throw;"); tabs--; println("}"); } // Close catch phrase tabs--; println("}"); } // Squirrel away the AST "return" value if (grammar.buildAST) { println("returnAST = " + s.getId() + "_AST;"); } // Set return tree value for tree walkers if ( grammar instanceof TreeWalkerGrammar ) { println("_retTree = _t;"); } // Generate literals test for lexer rules so marked if (rblk.getTestLiterals()) { if ( s.access.equals("protected") ) { genLiteralsTestForPartialToken(); } else { genLiteralsTest(); } } // if doing a lexer rule, dump code to create token if necessary if ( grammar instanceof LexerGrammar ) { println("if ( _createToken && _token=="+namespaceAntlr+"nullToken && _ttype!="+namespaceAntlr+"Token::SKIP ) {"); println(" _token = makeToken(_ttype);"); println(" _token->setText(text.substr(_begin, text.length()-_begin));"); println("}"); println("_returnToken = _token;"); // It should be easy for an optimizing compiler to realize this does nothing // but it avoids the warning about the variable being unused. println("_saveIndex=0;"); } // Gen the return statement if there is one (lexer has hard-wired return action) if (rblk.returnAction != null) { println("return " + extractIdOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + ";"); } // if ( grammar.debuggingOutput || grammar.traceRules) { //// tabs--; //// println("} finally { // debugging"); //// tabs++; // // // Generate trace code if desired // if ( grammar.debuggingOutput) // if (grammar instanceof ParserGrammar) // println("fireExitRule(" + ruleNum + ",0);"); // else if (grammar instanceof LexerGrammar) // println("fireExitRule(" + ruleNum + ",_ttype);"); // //// if (grammar.traceRules) { //// if ( grammar instanceof TreeWalkerGrammar ) { //// println("traceOut(\""+ s.getId() +"\",_t);"); //// } //// else { //// println("traceOut(\""+ s.getId() +"\");"); //// } //// } //// //// tabs--; //// println("}"); // } tabs--; println("}"); println(""); // Restore the AST generation state genAST = savegenAST; // restore char save state // saveText = oldsaveTest; } public void genRuleHeader(RuleSymbol s, boolean startSymbol) { tabs=1; if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("genRuleHeader("+ s.getId() +")"); if ( !s.isDefined() ) { antlrTool.error("undefined rule: "+ s.getId()); return; } // Generate rule return type, name, arguments RuleBlock rblk = s.getBlock(); currentRule = rblk; currentASTResult = s.getId(); // Save the AST generation state, and set it to that of the rule boolean savegenAST = genAST; genAST = genAST && rblk.getAutoGen(); // boolean oldsaveTest = saveText; saveText = rblk.getAutoGen(); // Gen method access print(s.access + ": "); // Gen method return type (note lexer return action set at rule creation) if (rblk.returnAction != null) { // Has specified return value _print(extractTypeOfAction(rblk.returnAction, rblk.getLine(), rblk.getColumn()) + " "); } else { // No specified return value _print("void "); } // Gen method name _print(s.getId() + "("); // Additional rule parameters common to all rules for this grammar _print(commonExtraParams); if (commonExtraParams.length() != 0 && rblk.argAction != null ) { _print(","); } // Gen arguments if (rblk.argAction != null) { // Has specified arguments _println(""); tabs++; println(rblk.argAction); tabs--; print(")"); } else { // No specified arguments _print(")"); } _println(";"); tabs--; // Restore the AST generation state genAST = savegenAST; // restore char save state // saveText = oldsaveTest; } private void GenRuleInvocation(RuleRefElement rr) { // dump rule name _print(rr.targetRule + "("); // lexers must tell rule if it should set _returnToken if ( grammar instanceof LexerGrammar ) { // if labeled, could access Token, so tell rule to create if ( rr.getLabel() != null ) { _print("true"); } else { _print("false"); } if (commonExtraArgs.length() != 0 || rr.args!=null ) { _print(","); } } // Extra arguments common to all rules for this grammar _print(commonExtraArgs); if (commonExtraArgs.length() != 0 && rr.args!=null ) { _print(","); } // Process arguments to method, if any RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule); if (rr.args != null) { // When not guessing, execute user arg action ActionTransInfo tInfo = new ActionTransInfo(); // FIXME: fix line number passed to processActionForTreeSpecifiers here.. // this one might be a bit off.. String args = processActionForSpecialSymbols(rr.args, rr.line, currentRule, tInfo); if ( tInfo.assignToRoot || tInfo.refRuleRoot!=null ) { antlrTool.error("Arguments of rule reference '" + rr.targetRule + "' cannot set or ref #"+ currentRule.getRuleName()+" on line "+rr.getLine()); } _print(args); // Warn if the rule accepts no arguments if (rs.block.argAction == null) { antlrTool.warning("Rule '" + rr.targetRule + "' accepts no arguments", grammar.getFilename(), rr.getLine(), rr.getColumn()); } } else { // For C++, no warning if rule has parameters, because there may be default // values for all of the parameters //if (rs.block.argAction != null) { // tool.warning("Missing parameters on reference to rule "+rr.targetRule, rr.getLine()); //} } _println(");"); // move down to the first child while parsing if ( grammar instanceof TreeWalkerGrammar ) { println("_t = _retTree;"); } } protected void genSemPred(String pred, int line) { // translate $ and # references ActionTransInfo tInfo = new ActionTransInfo(); pred = processActionForSpecialSymbols(pred, line, currentRule, tInfo); // ignore translation info...we don't need to do anything with it. String escapedPred = charFormatter.escapeString(pred); // if debugging, wrap the semantic predicate evaluation in a method // that can tell SemanticPredicateListeners the result if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING," //FIXME + addSemPred(escapedPred) + "," + pred + ")"; println("if (!(" + pred + "))"); tabs++; println("throw "+namespaceAntlr+"SemanticException(\"" + escapedPred + "\");"); tabs--; } /** Write an array of Strings which are the semantic predicate * expressions. The debugger will reference them by number only */ protected void genSemPredMap(String prefix) { Enumeration e = semPreds.elements(); println("const char* " + prefix + "_semPredNames[] = {"); tabs++; while(e.hasMoreElements()) println("\""+e.nextElement()+"\","); println("0"); tabs--; println("};"); } protected void genSynPred(SynPredBlock blk, String lookaheadExpr) { if ( DEBUG_CODE_GENERATOR || DEBUG_CPP_CODE_GENERATOR ) System.out.println("gen=>("+blk+")"); // Dump synpred result variable println("bool synPredMatched" + blk.ID + " = false;"); if( grammar instanceof TreeWalkerGrammar ) { println("if (_t == "+labeledElementASTInit+" )"); tabs++; println("_t = ASTNULL;"); tabs--; } // Gen normal lookahead test println("if (" + lookaheadExpr + ") {"); tabs++; // Save input state if ( grammar instanceof TreeWalkerGrammar ) { println(labeledElementType + " __t" + blk.ID + " = _t;"); } else { println("int _m" + blk.ID + " = mark();"); } // Once inside the try, assume synpred works unless exception caught println("synPredMatched" + blk.ID + " = true;"); println("inputState->guessing++;"); // if debugging, tell listeners that a synpred has started if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) { println("fireSyntacticPredicateStarted();"); } syntacticPredLevel++; println("try {"); tabs++; gen((AlternativeBlock)blk); // gen code to test predicate tabs--; //println("System.out.println(\"pred "+blk+" succeeded\");"); println("}"); println("catch (" + exceptionThrown + "& pe) {"); tabs++; println("synPredMatched"+blk.ID+" = false;"); //println("System.out.println(\"pred "+blk+" failed\");"); tabs--; println("}"); // Restore input state if ( grammar instanceof TreeWalkerGrammar ) { println("_t = __t"+blk.ID+";"); } else { println("rewind(_m"+blk.ID+");"); } println("inputState->guessing--;"); // if debugging, tell listeners how the synpred turned out if (grammar.debuggingOutput && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) { println("if (synPredMatched" + blk.ID +")"); println(" fireSyntacticPredicateSucceeded();"); println("else"); println(" fireSyntacticPredicateFailed();"); } syntacticPredLevel--; tabs--; // Close lookahead test println("}"); // Test synpred result println("if ( synPredMatched"+blk.ID+" ) {"); } /** Generate a static array containing the names of the tokens, * indexed by the token type values. This static array is used * to format error messages so that the token identifers or literal * strings are displayed instead of the token numbers. * * If a lexical rule has a paraphrase, use it rather than the * token label. */ public void genTokenStrings(String prefix) { // Generate a string for each token. This creates a static // array of Strings indexed by token type. // println(""); println("const char* " + prefix + "tokenNames[] = {"); tabs++; // Walk the token vocabulary and generate a Vector of strings // from the tokens. Vector v = grammar.tokenManager.getVocabulary(); for (int i = 0; i < v.size(); i++) { String s = (String)v.elementAt(i); if (s == null) { s = "<"+String.valueOf(i)+">"; } if ( !s.startsWith("\"") && !s.startsWith("<") ) { TokenSymbol ts = (TokenSymbol)grammar.tokenManager.getTokenSymbol(s); if ( ts!=null && ts.getParaphrase()!=null ) { s = StringUtils.stripFrontBack(ts.getParaphrase(), "\"", "\""); } } print(charFormatter.literalString(s)); _println(","); } println("0"); // Close the string array initailizer tabs--; println("};"); } /** Generate the token types C++ file */ protected void genTokenTypes(TokenManager tm) throws IOException { // Open the token output header file and set the currentOutput stream outputFile = tm.getName() + TokenTypesFileSuffix+".hpp"; outputLine = 1; currentOutput = antlrTool.openOutputFile(outputFile); //SAS: changed for proper text file io tabs = 0; // Generate a guard wrapper println("#ifndef INC_"+tm.getName()+TokenTypesFileSuffix+"_hpp_"); println("#define INC_"+tm.getName()+TokenTypesFileSuffix+"_hpp_"); println(""); if (nameSpace != null) nameSpace.emitDeclarations(currentOutput); // Generate the header common to all C++ files genHeader(outputFile); // Encapsulate the definitions in an interface. This can be done // because they are all constants. println(""); println("#ifndef CUSTOM_API"); println("# define CUSTOM_API"); println("#endif"); println(""); // In the case that the .hpp is included from C source (flexLexer!) // we just turn things into a plain enum println("#ifdef __cplusplus"); println("struct CUSTOM_API " + tm.getName() + TokenTypesFileSuffix+" {"); println("#endif"); tabs++; println("enum {"); tabs++; // Generate a definition for each token type Vector v = tm.getVocabulary(); // Do special tokens manually println("EOF_ = " + Token.EOF_TYPE + ","); // Move the other special token to the end, so we can solve // the superfluous comma problem easily for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) { String s = (String)v.elementAt(i); if (s != null) { if ( s.startsWith("\"") ) { // a string literal StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s); if ( sl==null ) { antlrTool.panic("String literal "+s+" not in symbol table"); } else if ( sl.label != null ) { println(sl.label + " = " + i + ","); } else { String mangledName = mangleLiteral(s); if (mangledName != null) { // We were able to create a meaningful mangled token name println(mangledName + " = " + i + ","); // if no label specified, make the label equal to the mangled name sl.label = mangledName; } else { println("// " + s + " = " + i); } } } else if ( !s.startsWith("<") ) { println(s + " = " + i + ","); } } } // Moved from above println("NULL_TREE_LOOKAHEAD = " + Token.NULL_TREE_LOOKAHEAD); // Close the enum tabs--; println("};"); // Close the interface tabs--; println("#ifdef __cplusplus"); println("};"); println("#endif"); if (nameSpace != null) nameSpace.emitClosures(currentOutput); // Generate a guard wrapper println("#endif /*INC_"+tm.getName()+TokenTypesFileSuffix+"_hpp_*/"); // Close the tokens output file currentOutput.close(); currentOutput = null; exitIfError(); } /** Process a string for an simple expression for use in xx/action.g * it is used to cast simple tokens/references to the right type for * the generated language. Basically called for every element in * the vector to getASTCreateString(vector V) * @param str A String. */ public String processStringForASTConstructor( String str ) { if( usingCustomAST && ((grammar instanceof TreeWalkerGrammar) || (grammar instanceof ParserGrammar)) && !(grammar.tokenManager.tokenDefined(str) ) ) { // System.out.println("processStringForASTConstructor: "+str+" with cast"); return namespaceAntlr+"RefAST("+str+")"; } else { // System.out.println("processStringForASTConstructor: "+str); return str; } } /** Get a string for an expression to generate creation of an AST subtree. * @param v A Vector of String, where each element is an expression * in the target language yielding an AST node. */ public String getASTCreateString(Vector v) { if (v.size() == 0) { return ""; } StringBuffer buf = new StringBuffer(); // the labeledElementASTType here can probably be a cast or nothing // in the case of ! usingCustomAST buf.append(labeledElementASTType+ "(astFactory->make((new "+namespaceAntlr+ "ASTArray("+v.size()+"))"); for (int i = 0; i < v.size(); i++) { buf.append("->add("+ v.elementAt(i) + ")"); } buf.append("))"); return buf.toString(); } /** Get a string for an expression to generate creating of an AST node * @param str The arguments to the AST constructor */ public String getASTCreateString(GrammarAtom atom, String str) { if ( atom!=null && atom.getASTNodeType() != null ) { // this atom is using a heterogeneous AST type. (and maybe a local // override we can't see at the TokenManager level) // make note of the factory needed to generate it.. // later this is inserted into the initializeFactory method. astTypes.ensureCapacity(atom.getType()); String type = (String)astTypes.elementAt(atom.getType()); if( type == null ) astTypes.setElementAt(atom.getASTNodeType(),atom.getType()); else { // give a warning over action taken if the types are unequal if( ! atom.getASTNodeType().equals(type) ) { antlrTool.warning("Attempt to redefine AST type for "+atom.getText(),grammar.getFilename(),atom.getLine(),atom.getColumn()); antlrTool.warning(" from \""+type+"\" to \""+atom.getASTNodeType()+"\" sticking to \""+type+"\"",grammar.getFilename(),atom.getLine(),atom.getColumn()); } else astTypes.setElementAt(atom.getASTNodeType(),atom.getType()); } // after above init the factory knows what to generate... return "astFactory->create("+str+")"; } else { // FIXME: This is *SO* ugly! but it will have to do for now... // 2.7.2 will have better I hope // this is due to the usage of getASTCreateString from inside // actions/cpp/action.g boolean is_constructor = false; if( str.indexOf(',') != -1 ) is_constructor = grammar.tokenManager.tokenDefined(str.substring(0,str.indexOf(','))); // System.out.println("getAstCreateString(as): "+str+" "+grammar.tokenManager.tokenDefined(str)); if( usingCustomAST && (grammar instanceof TreeWalkerGrammar) && !(grammar.tokenManager.tokenDefined(str) ) && ! is_constructor ) return "astFactory->create("+namespaceAntlr+"RefAST("+str+"))"; else return "astFactory->create("+str+")"; } } /** Get a string for an expression to generate creating of an AST node * @param str The arguments to the AST constructor */ public String getASTCreateString(String str) { // System.out.println("getAstCreateString(str): "+str+" "+grammar.tokenManager.tokenDefined(str)); if( usingCustomAST ) return labeledElementASTType+"(astFactory->create("+namespaceAntlr+"RefAST("+str+")))"; else return "astFactory->create("+str+")"; } protected String getLookaheadTestExpression(Lookahead[] look, int k) { StringBuffer e = new StringBuffer(100); boolean first = true; e.append("("); for (int i = 1; i <= k; i++) { BitSet p = look[i].fset; if (!first) { e.append(") && ("); } first = false; // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead. // There is no way to predict what that token would be. Just // allow anything instead. if (look[i].containsEpsilon()) { e.append("true"); } else { e.append(getLookaheadTestTerm(i, p)); } } e.append(")"); return e.toString(); } /** Generate a lookahead test expression for an alternate. This * will be a series of tests joined by '&&' and enclosed by '()', * the number of such tests being determined by the depth of the lookahead. */ protected String getLookaheadTestExpression(Alternative alt, int maxDepth) { int depth = alt.lookaheadDepth; if ( depth == GrammarAnalyzer.NONDETERMINISTIC ) { // if the decision is nondeterministic, do the best we can: LL(k) // any predicates that are around will be generated later. depth = grammar.maxk; } if ( maxDepth==0 ) { // empty lookahead can result from alt with sem pred // that can see end of token. E.g., A : {pred}? ('a')? ; return "true"; } /* boolean first = true; for (int i=1; i<=depth && i<=maxDepth; i++) { BitSet p = alt.cache[i].fset; if (!first) { e.append(") && ("); } first = false; // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead. // There is no way to predict what that token would be. Just // allow anything instead. if ( alt.cache[i].containsEpsilon() ) { e.append("true"); } else { e.append(getLookaheadTestTerm(i, p)); } } e.append(")"); */ return "(" + getLookaheadTestExpression(alt.cache,depth) + ")"; } /**Generate a depth==1 lookahead test expression given the BitSet. * This may be one of: * 1) a series of 'x==X||' tests * 2) a range test using >= && <= where possible, * 3) a bitset membership test for complex comparisons * @param k The lookahead level * @param p The lookahead set for level k */ protected String getLookaheadTestTerm(int k, BitSet p) { // Determine the name of the item to be compared String ts = lookaheadString(k); // Generate a range expression if possible int[] elems = p.toArray(); if (elementsAreRange(elems)) { return getRangeExpression(k, elems); } // Generate a bitset membership test if possible StringBuffer e; int degree = p.degree(); if ( degree == 0 ) { return "true"; } if (degree >= bitsetTestThreshold) { int bitsetIdx = markBitsetForGen(p); return getBitsetName(bitsetIdx) + ".member(" + ts + ")"; } // Otherwise, generate the long-winded series of "x==X||" tests e = new StringBuffer(); for (int i = 0; i < elems.length; i++) { // Get the compared-to item (token or character value) String cs = getValueString(elems[i]); // Generate the element comparison if( i > 0 ) e.append(" || "); e.append(ts); e.append(" == "); e.append(cs); } return e.toString(); } /** Return an expression for testing a contiguous renage of elements * @param k The lookahead level * @param elems The elements representing the set, usually from BitSet.toArray(). * @return String containing test expression. */ public String getRangeExpression(int k, int[] elems) { if (!elementsAreRange(elems)) { antlrTool.panic("getRangeExpression called with non-range"); } int begin = elems[0]; int end = elems[elems.length-1]; return "(" + lookaheadString(k) + " >= " + getValueString(begin) + " && " + lookaheadString(k) + " <= " + getValueString(end) + ")"; } /** getValueString: get a string representation of a token or char value * @param value The token or char value */ private String getValueString(int value) { String cs; if ( grammar instanceof LexerGrammar ) { cs = charFormatter.literalChar(value); } else { TokenSymbol ts = grammar.tokenManager.getTokenSymbolAt(value); if ( ts == null ) { return ""+value; // return token type as string // tool.panic("vocabulary for token type " + value + " is null"); } String tId = ts.getId(); if ( ts instanceof StringLiteralSymbol ) { // if string literal, use predefined label if any // if no predefined, try to mangle into LITERAL_xxx. // if can't mangle, use int value as last resort StringLiteralSymbol sl = (StringLiteralSymbol)ts; String label = sl.getLabel(); if ( label!=null ) { cs = label; } else { cs = mangleLiteral(tId); if (cs == null) { cs = String.valueOf(value); } } } else { if ( tId.equals("EOF") ) cs = namespaceAntlr+"Token::EOF_TYPE"; else cs = tId; } } return cs; } /**Is the lookahead for this alt empty? */ protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) { int depth = alt.lookaheadDepth; if ( depth == GrammarAnalyzer.NONDETERMINISTIC ) { depth = grammar.maxk; } for (int i=1; i<=depth && i<=maxDepth; i++) { BitSet p = alt.cache[i].fset; if (p.degree() != 0) { return false; } } return true; } private String lookaheadString(int k) { if (grammar instanceof TreeWalkerGrammar) { return "_t->getType()"; } return "LA(" + k + ")"; } /** Mangle a string literal into a meaningful token name. This is * only possible for literals that are all characters. The resulting * mangled literal name is literalsPrefix with the text of the literal * appended. * @return A string representing the mangled literal, or null if not possible. */ private String mangleLiteral(String s) { String mangled = antlrTool.literalsPrefix; for (int i = 1; i < s.length()-1; i++) { if (!Character.isLetter(s.charAt(i)) && s.charAt(i) != '_') { return null; } mangled += s.charAt(i); } if ( antlrTool.upperCaseMangledLiterals ) { mangled = mangled.toUpperCase(); } return mangled; } /** Map an identifier to it's corresponding tree-node variable. * This is context-sensitive, depending on the rule and alternative * being generated * @param idParam The identifier name to map * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates */ public String mapTreeId(String idParam, ActionTransInfo transInfo) { // if not in an action of a rule, nothing to map. if ( currentRule==null ) return idParam; // System.out.print("mapTreeId: "+idParam+" "+currentRule.getRuleName()+" "); boolean in_var = false; String id = idParam; if (grammar instanceof TreeWalkerGrammar) { // RK: hmmm this seems odd. If buildAST is false it translates // #rulename_in to 'rulename_in' else to 'rulename_AST_in' which indeed // exists. disabling for now.. and hope it doesn't blow up somewhere. if ( !grammar.buildAST ) { in_var = true; // System.out.println("in_var1"); } // If the id ends with "_in", then map it to the input variable // else if (id.length() > 3 && id.lastIndexOf("_in") == id.length()-3) { // Strip off the "_in" id = id.substring(0, id.length()-3); in_var = true; // System.out.println("in_var2"); } } // System.out.print(in_var+"\t"); // Check the rule labels. If id is a label, then the output // variable is label_AST, and the input variable is plain label. for (int i = 0; i < currentRule.labeledElements.size(); i++) { AlternativeElement elt = (AlternativeElement)currentRule.labeledElements.elementAt(i); if (elt.getLabel().equals(id)) { // if( in_var ) // System.out.println("returning (vec) "+(in_var ? id : id + "_AST")); return in_var ? id : id + "_AST"; } } // Failing that, check the id-to-variable map for the alternative. // If the id is in the map, then output variable is the name in the // map, and input variable is name_in String s = (String)treeVariableMap.get(id); if (s != null) { if (s == NONUNIQUE) { // if( in_var ) // System.out.println("returning null (nonunique)"); // There is more than one element with this id antlrTool.error("Ambiguous reference to AST element "+id+ " in rule "+currentRule.getRuleName()); return null; } else if (s.equals(currentRule.getRuleName())) { // a recursive call to the enclosing rule is // ambiguous with the rule itself. // if( in_var ) // System.out.println("returning null (rulename)"); antlrTool.error("Ambiguous reference to AST element "+id+ " in rule "+currentRule.getRuleName()); return null; } else { // if( in_var ) // System.out.println("returning "+(in_var?s+"_in":s)); return in_var ? s + "_in" : s; } } // System.out.println("Last check: "+id+" == "+currentRule.getRuleName()); // Failing that, check the rule name itself. Output variable // is rule_AST; input variable is rule_AST_in (treeparsers). if( id.equals(currentRule.getRuleName()) ) { String r = in_var ? id + "_AST_in" : id + "_AST"; if ( transInfo!=null ) { if ( !in_var ) { transInfo.refRuleRoot = r; } } // if( in_var ) // System.out.println("returning (r) "+r); return r; } else { // if( in_var ) // System.out.println("returning (last) "+id); // id does not map to anything -- return itself. return id; } } /** Given an element and the name of an associated AST variable, * create a mapping between the element "name" and the variable name. */ private void mapTreeVariable(AlternativeElement e, String name) { // For tree elements, defer to the root if (e instanceof TreeElement) { mapTreeVariable( ((TreeElement)e).root, name); return; } // Determine the name of the element, if any, for mapping purposes String elName = null; // Don't map labeled items if (e.getLabel() == null) { if (e instanceof TokenRefElement) { // use the token id elName = ((TokenRefElement)e).atomText; } else if (e instanceof RuleRefElement) { // use the rule name elName = ((RuleRefElement)e).targetRule; } } // Add the element to the tree variable map if it has a name if (elName != null) { if (treeVariableMap.get(elName) != null) { // Name is already in the map -- mark it as duplicate treeVariableMap.remove(elName); treeVariableMap.put(elName, NONUNIQUE); } else { treeVariableMap.put(elName, name); } } } /** Lexically process tree-specifiers in the action. * This will replace #id and #(...) with the appropriate * function calls and/or variables. */ protected String processActionForSpecialSymbols(String actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo) { if ( actionStr==null || actionStr.length()==0 ) return null; // The action trans info tells us (at the moment) whether an // assignment was done to the rule's tree root. if (grammar==null) return actionStr; if ((grammar.buildAST && actionStr.indexOf('#') != -1) || grammar instanceof TreeWalkerGrammar || ((grammar instanceof LexerGrammar || grammar instanceof ParserGrammar) && actionStr.indexOf('$') != -1) ) { // Create a lexer to read an action and return the translated version antlr.actions.cpp.ActionLexer lexer = new antlr.actions.cpp.ActionLexer(actionStr, currentRule, this, tInfo); lexer.setLineOffset(line); lexer.setFilename(grammar.getFilename()); lexer.setTool(antlrTool); try { lexer.mACTION(true); actionStr = lexer.getTokenObject().getText(); // System.out.println("action translated: "+actionStr); // System.out.println("trans info is "+tInfo); } catch (RecognitionException ex) { lexer.reportError(ex); return actionStr; } catch (TokenStreamException tex) { antlrTool.panic("Error reading action:"+actionStr); return actionStr; } catch (CharStreamException io) { antlrTool.panic("Error reading action:"+actionStr); return actionStr; } } return actionStr; } private String fixNameSpaceOption( String ns ) { ns = StringUtils.stripFrontBack(ns,"\"","\""); if( ns.length() > 2 && !ns.substring(ns.length()-2, ns.length()).equals("::") ) ns += "::"; return ns; } private void setupGrammarParameters(Grammar g) { if (g instanceof ParserGrammar || g instanceof LexerGrammar || g instanceof TreeWalkerGrammar ) { /* RK: options also have to be added to Grammar.java and for options * on the file level entries have to be defined in * DefineGrammarSymbols.java and passed around via 'globals' in * antlrTool.java */ if( antlrTool.nameSpace != null ) nameSpace = antlrTool.nameSpace; if( antlrTool.namespaceStd != null ) namespaceStd = fixNameSpaceOption(antlrTool.namespaceStd); if( antlrTool.namespaceAntlr != null ) namespaceAntlr = fixNameSpaceOption(antlrTool.namespaceAntlr); genHashLines = antlrTool.genHashLines; /* let grammar level options override filelevel ones... */ if( g.hasOption("namespace") ) { Token t = g.getOption("namespace"); if( t != null ) { nameSpace = new NameSpace(t.getText()); } } if( g.hasOption("namespaceAntlr") ) { Token t = g.getOption("namespaceAntlr"); if( t != null ) { String ns = StringUtils.stripFrontBack(t.getText(),"\"","\""); if ( ns != null ) { if( ns.length() > 2 && !ns.substring(ns.length()-2, ns.length()).equals("::") ) ns += "::"; namespaceAntlr = ns; } } } if( g.hasOption("namespaceStd") ) { Token t = g.getOption("namespaceStd"); if( t != null ) { String ns = StringUtils.stripFrontBack(t.getText(),"\"","\""); if ( ns != null ) { if( ns.length() > 2 && !ns.substring(ns.length()-2, ns.length()).equals("::") ) ns += "::"; namespaceStd = ns; } } } if( g.hasOption("genHashLines") ) { Token t = g.getOption("genHashLines"); if( t != null ) { String val = StringUtils.stripFrontBack(t.getText(),"\"","\""); genHashLines = val.equals("true"); } } noConstructors = antlrTool.noConstructors; // get the default if( g.hasOption("noConstructors") ) { Token t = g.getOption("noConstructors"); if( (t != null) && !(t.getText().equals("true") || t.getText().equals("false"))) antlrTool.error("noConstructors option must be true or false", antlrTool.getGrammarFile(), t.getLine(), t.getColumn()); noConstructors = t.getText().equals("true"); } } if (g instanceof ParserGrammar) { labeledElementASTType = namespaceAntlr+"RefAST"; labeledElementASTInit = namespaceAntlr+"nullAST"; if ( g.hasOption("ASTLabelType") ) { Token tsuffix = g.getOption("ASTLabelType"); if ( tsuffix != null ) { String suffix = StringUtils.stripFrontBack(tsuffix.getText(),"\"","\""); if ( suffix != null ) { usingCustomAST = true; labeledElementASTType = suffix; labeledElementASTInit = suffix+"("+namespaceAntlr+"nullAST)"; } } } labeledElementType = namespaceAntlr+"RefToken "; labeledElementInit = namespaceAntlr+"nullToken"; commonExtraArgs = ""; commonExtraParams = ""; commonLocalVars = ""; lt1Value = "LT(1)"; exceptionThrown = namespaceAntlr+"RecognitionException"; throwNoViable = "throw "+namespaceAntlr+"NoViableAltException(LT(1), getFilename());"; } else if (g instanceof LexerGrammar) { labeledElementType = "char "; labeledElementInit = "'\\0'"; commonExtraArgs = ""; commonExtraParams = "bool _createToken"; commonLocalVars = "int _ttype; "+namespaceAntlr+"RefToken _token; "+namespaceStd+"string::size_type _begin = text.length();"; lt1Value = "LA(1)"; exceptionThrown = namespaceAntlr+"RecognitionException"; throwNoViable = "throw "+namespaceAntlr+"NoViableAltForCharException(LA(1), getFilename(), getLine(), getColumn());"; } else if (g instanceof TreeWalkerGrammar) { labeledElementInit = namespaceAntlr+"nullAST"; labeledElementASTInit = namespaceAntlr+"nullAST"; labeledElementASTType = namespaceAntlr+"RefAST"; labeledElementType = namespaceAntlr+"RefAST"; commonExtraParams = namespaceAntlr+"RefAST _t"; throwNoViable = "throw "+namespaceAntlr+"NoViableAltException(_t);"; lt1Value = "_t"; if ( g.hasOption("ASTLabelType") ) { Token tsuffix = g.getOption("ASTLabelType"); if ( tsuffix != null ) { String suffix = StringUtils.stripFrontBack(tsuffix.getText(),"\"","\""); if ( suffix != null ) { usingCustomAST = true; labeledElementASTType = suffix; labeledElementType = suffix; labeledElementInit = suffix+"("+namespaceAntlr+"nullAST)"; labeledElementASTInit = labeledElementInit; commonExtraParams = suffix+" _t"; throwNoViable = "throw "+namespaceAntlr+"NoViableAltException("+namespaceAntlr+"RefAST(_t));"; lt1Value = "_t"; } } } if ( !g.hasOption("ASTLabelType") ) { g.setOption("ASTLabelType", new Token(ANTLRTokenTypes.STRING_LITERAL,namespaceAntlr+"RefAST")); } commonExtraArgs = "_t"; commonLocalVars = ""; exceptionThrown = namespaceAntlr+"RecognitionException"; } else { antlrTool.panic("Unknown grammar type"); } } }

查找资源

输入类名或文件名

类结构窗口