Xtext : identify when a optional must be call

Xtext : identify when a optional must be call - modeling

I am using xtext to define a grammar.
I have a problem with the runtime syntax evaluation.
the rule is signatureDeclaration. Here the full grammar:
// automatically generated by Xtext
grammar org.xtext.alloy.Alloy with org.eclipse.xtext.common.Terminals
import "http://fr.cuauh.als/1.0"
import "http://www.eclipse.org/emf/2002/Ecore" as ecore
//specification ::= [module] open* paragraph*
//ok
Specification returns Specification:
{Specification}
(module=Module)?
(opens+=Library (opens+=Library)*)?
(paragraphs+=Paragraph (paragraphs+=Paragraph)*)?;
//module ::= "module" name [ "[" ["exactly"] name ("," ["exactly"] num)* "]" ]
//module ::= "module" name? [ "[" ["exactly"] name ("," ExactlyNum )* "]" ]
//ok
Module returns Module:
{Module}
'module' (name=IDName)? ('['(exactly?='exactly')? extensionName=[IDref] (nums+=ExactlyNums ( "," nums+=ExactlyNums)*)?']')?
;
IDName returns IDName:
name=ID
;
terminal ID : '^'?('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'/')*;
//open ::= ["private"] "open" name [ "[" ref,+ "]" ] [ "as" name ]
//open ::= ["private"] "open" path [ "[" ref,+ "]" ] [ "as" name ]
//ok
Library returns Library:
{Library}
(private?='private')? 'open' path=EString ('['references+=Reference (',' references+=Reference)*']')? ('as' alias=Alias)?
;
//a path
//ok
//terminal PATH returns ecore::EString :
// ('a'..'z'|'A'..'Z'|'_'|'.')+('/'('a'..'z'|'A'..'Z'|'_'|'.')+)*
//;
//paragraph ::= factDecl | assertDecl | funDecl | cmdDecl | enumDecl | sigDecl
//paragraph ::= factDecl | assertDecl | funDecl | predDecl | cmdDecl | enumDecl | sigDecl
//ok
Paragraph returns Paragraph:
FactDeclaration | AssertDeclaration | FunctionDeclaration | PredicatDeclaration | CmdDeclaration | EnumerationDeclaration | SignatureDeclaration;
//cmdDecl ::= [name ":"] ("run"|"check") (name|block) scope
//cmdDecl ::= [name ":"] command (ref|block) scope ["expect (0|1)"]
//ok
CmdDeclaration returns CmdDeclaration:
(name=IDName ':')? operation=cmdOp referenceOrBlock=ReferenceOrBlock (scope=Scope)? (expect?='expect' expectValue=EInt)?;
//ok
ReferenceOrBlock returns ReferenceOrBlock:
BlockExpr | ReferenceName;
//sigDecl ::= sigQual* "sig" name,+ [sigExt] "{" decl,* "}" [block]
//sigDecl ::= ["private"] ["abstract"] [quant] "sig" name [sigExt] "{" relDecl,* "}" [block]
//ok
SignatureDeclaration returns SignatureDeclaration:
{SignatureDeclaration}
(isPrivate?='private')? (isAbstract?='abstract')? (quantifier=SignatureQuantifier)? 'sig' name=IDName (extension=SignatureExtension)? '{'
(relations+=RelationDeclaration ( ',' =>relations+=RelationDeclaration)* )?
'}'
(block=Block)?;
//ok
SignatureExtension returns SignatureExtension:
SignatureinInheritance | SignatureInclusion;
TypeScopeTarget returns TypeScopeTarget:
Int0 | Seq | ReferenceName;
//name ::= ("this" | ID) ["/" ID]*
//ok do not need to be part of the concrete syntax
//Name returns Name:
// thisOrId=ThisOrID ('/'ids+=ID0( "/" ids+=ID0)*)?;
//ok do not need to be part of the concrete syntax
//ThisOrID returns ThisOrID:
// ID0 | This;
EBoolean returns ecore::EBoolean:
'true' | 'false';
//["exactly"] num
//ok
ExactlyNums returns ExactlyNums:
exactly?='exactly' num=Number;
//ok do not need to be part of the concrete syntax
// IDName | IDref;
IDref returns IDref:
namedElement=[IDName]
;
This returns This:
{This}
'this'
;
EString returns ecore::EString:
STRING | ID;
//ok
EInt returns ecore::EInt:
'-'? INT;
Alias returns Alias:
{Alias}
name=IDName;
//factDecl ::= "fact" [name] block
//ok
FactDeclaration returns FactDeclaration:
'fact' (name=IDName)? block=Block;
//assertDecl ::= "assert" [name] block
//ok
AssertDeclaration returns AssertDeclaration:
'assert' (name=IDName)? block=Block ;
//funDecl ::= ["private"] "fun" [ref "."] name "(" decl,* ")" ":" expr block
//funDecl ::= ["private"] "fun" [ref "."] name "[" decl,* "]" ":" expr block
//funDecl ::= ["private"] "fun" [ref "."] name ":" expr block
//
//funDecl ::= ["private"] "fun" [ref "."] name ["(" paramDecl,* ")" | "[" paramDecl,* "]" ] ":" expr block
//ok
FunctionDeclaration returns FunctionDeclaration:
(private?='private')? 'fun' (reference=[Reference] ".")? name=IDName
('(' (parameters+=ParameterDeclaration ( "," parameters+=ParameterDeclaration)*)? ')'|
'[' (parameters+=ParameterDeclaration ( "," parameters+=ParameterDeclaration)*)? ']')?
':' ^returns=Expression
block=Block;
//funDecl ::= ["private"] "pred" [ref "."] name "(" decl,* ")" block
//funDecl ::= ["private"] "pred" [ref "."] name "[" decl,* "]" block
//funDecl ::= ["private"] "pred" [ref "."] name block
//
//predDecl ::= ["private"] "pred" [ref "."] name ["(" paramDecl,* ")" | "[" paramDecl,* "]" ] ":" block
//ok
PredicatDeclaration returns PredicatDeclaration:
(private?='private')? 'pred' (reference=[Reference|EString] ".")? name=IDName
('(' (parameters+=ParameterDeclaration ( "," parameters+=ParameterDeclaration)*)? ')'|
'[' (parameters+=ParameterDeclaration ( "," parameters+=ParameterDeclaration)*)? ']')?
block=Block;
//enumDecl ::= "enum" name "{" name ("," name)* "}"
//enumDecl ::= "enum" name "{" enumEl ("," enumEl)* "}"
//ok
EnumerationDeclaration returns EnumerationDeclaration:
'enum' name=IDName '{' enumeration+=EnumerationElement ( "," enumeration+=EnumerationElement)* '}';
//ok
EnumerationElement returns EnumerationElement:
{EnumerationElement}
name=IDName;
//"lone" | "one" | "some"
//ok
enum SignatureQuantifier returns SignatureQuantifier:
lone = 'lone' | one = 'one' | some = 'some';
//decl ::= ["private"] ["disj"] name,+ ":" ["disj"] expr
//decl ::= ["private"] ["disj"] name,+ ":" ["disj"] expr
//ok
RelationDeclaration returns RelationDeclaration:
(isPrivate?='private')? (varsAreDisjoint?='disj')? names+=VarDecl (',' names+=VarDecl)* ':' (expressionIsDisjoint?='disj')? expression=Expression;
//sigExt ::= "extends" ref
//ok
SignatureinInheritance returns SignatureinInheritance:
'extends' extends=Reference;
//sigExt ::= "in" ref ["+" ref]*
//ok
SignatureInclusion returns SignatureInclusion:
'in' includes+=Reference ( "+" includes+=Reference)* ;
//ok
VarDecl returns VarDecl:
{VarDecl}
name=IDName;
//decl ::= ["private"] ["disj"] name,+ ":" ["disj"] expr
//ok
ParameterDeclaration returns ParameterDeclaration:
(isPrivate?='private')? (varsAreDisjoint?='disj')? names+=VarDecl ( "," names+=VarDecl)* ':' (expressionIsDisjoint?='disj')? expression=Expression;
//("run"|"check")
//ok
enum cmdOp returns cmdOp:
run = 'run' | check = 'check';
//expr ::=
//1) "let" letDecl,+ blockOrBar
//2) | quant decl,+ blockOrBar
//3) | unOp expr
//4) | expr binOp expr
//5) | expr arrowOp expr
//6) | expr ["!"|"not"] compareOp expr
//7) | expr ("=>"|"implies") expr "else" expr
//8) | expr "[" expr,* "]"
//9) | number
//10) | "-" number
//11) | "none"
//12) | "iden"
//13) | "univ"
//14) | "Int"
//15) | "seq/Int"
//16) | "(" expr ")"
//17) | ["#"] Name
//18) | block
//19) | "{" decl,+ blockOrBar "}"
// expr ::= leftPart [rightPart]
Expression returns Expression:
lhs=NonLeftRecursiveExpression (=>parts=NaryPart)?;
//4) | expr binOp expr
//5) | expr arrowOp expr
//6) | expr ["!"|"not"] compareOp expr
//7) | expr ("=>"|"implies") expr "else" expr
//8) | expr "[" expr,* "]"
//ok
NaryPart returns NaryPart:
BinaryOrElsePart | CallPart;
//4) | expr binOp expr
//5) | expr arrowOp expr
//6) | expr ["!"|"not"] compareOp expr
//7) | expr ("=>"|"implies") expr "else" expr
//
//7) | expr ("=>"|"implies") expr "else" expr
//4)5)6) | expr binaryOperator expr*
//ok
BinaryOrElsePart returns BinaryOrElsePart:
=>('=>'|'implies') rhs=Expression (=>'else' else=Expression)? |
operator=BinaryOperator rhs=Expression ;
//8) | expr "[" expr,* "]"
//it is just the right part
//ok
CallPart returns CallPart:
{CallPart}
'['(params+=Expression ( "," params+=Expression)*)?']';
//1) "let" letDecl,+ blockOrBar
//2) | quant decl,+ blockOrBar
//19) | "{" decl,+ blockOrBar "}"
//18) | block
// | terminalExpression
NonLeftRecursiveExpression returns NonLeftRecursiveExpression:
LetExpression | QuantifiedExpression | CurlyBracketsExpression | BlockExpr | TerminalExpression;
//1) "let" letDecl,+ blockOrBar
//ok
LetExpression returns LetExpression:
'let' letDeclarations+=LetDeclaration ( "," letDeclarations+=LetDeclaration)* blockOrBar=BlockOrBar;
//2) | quant decl,+ blockOrBar
//ok
QuantifiedExpression returns QuantifiedExpression:
quantifier=QuantifiedExpressionQuantifier varDeclaration+=VarDeclaration ( "," varDeclaration+=VarDeclaration)* blockOrBar=BlockOrBar;
//
//binOp ::= "||" | "or" | "&&" | "and" | "&" | "<=>" | "iff" | "=>" | "implies" | "+" | "-" | "++" | "<:" | ":>" | "." | "<<" | ">>" | ">>>"
//compareOp ::= "=" | "in" | "<" | ">" | "=<" | ">="
//arrowOp ::= ["some"|"one"|"lone"|"set"] "->" ["some"|"one"|"lone"|"set"]
//ok
BinaryOperator returns BinaryOperator:
RelationalOperator | CompareOperator | ArrowOperator;
//ok
RelationalOperator returns RelationalOperator:
operator=RelationalOp;
//binOp ::= "||" | "or" | "&&" | "and" | "&" | "<=>" | "iff" | "=>" | "implies" | "+" | "-" | "++" | "<:" | ":>" | "." | "<<" | ">>" | ">>>"
//ok
enum RelationalOp returns RelationalOp:
or = '||' | and = '&&' | union = '+' | intersection = '&' | difference = '-' | equivalence = '<=>' | override = '++'
| domain = '<:' | range = ':>' | join = '.' ; // | lshift = '<<' | rshift = '>>' | rrshift = '>>>';
//["!"|"not"] compareOp
//ok
CompareOperator returns CompareOperator:
(negated?='!' | negated?='not')? operator=CompareOp;
//compareOp ::= "=" | "in" | "<" | ">" | "=<" | ">="
enum CompareOp returns CompareOp:
equal = '=' | inclusion = 'in' | lesser = '<' | greater = '>' | lesserOrEq = '<=' | greaterOrEq = '>=';
//arrowOp ::= ["some"|"one"|"lone"|"set"] "->" ["some"|"one"|"lone"|"set"]
//ok
ArrowOperator returns ArrowOperator:
{ArrowOperator}
(leftQuantifier=ArrowQuantifier)? '->' (=>rightQuantifier=ArrowQuantifier)?;
//"some"|"one"|"lone"|"set"
//ok
enum ArrowQuantifier returns ArrowQuantifier:
lone = 'lone' | one = 'one' | some = 'some' | set = 'set' ;
//19) | "{" decl,+ blockOrBar "}"
//ok
CurlyBracketsExpression returns CurlyBracketsExpression:
'{' varDeclarations+=VarDeclaration ( "," varDeclarations+=VarDeclaration)* blockOrBar=BlockOrBar '}';
//blockOrBar ::= block
//blockOrBar ::= "|" expr
//ok
BlockOrBar returns BlockOrBar:
BlockExpr | Bar;
//blockOrBar ::= "|" expr
//ok
Bar returns Bar:
'|' expression=Expression;
//block ::= "{" expr* "}"
//ok
BlockExpr returns BlockExpr:
{BlockExpr}
'{' (expressions+=Expression ( "," expressions+=Expression)*)?'}';
//3) unOp expr
// | finalExpression
TerminalExpression returns TerminalExpression:
UnaryExpr | finalExpression ;
//3) unOp expr
//ok
UnaryExpr returns UnaryExpr:
unOp=UnaryOperator expression=TerminalExpression;
//unOp ::= "!" | "not" | "no" | "some" | "lone" | "one" | "set" | "seq" | "#" | "~" | "*" | "^"
//unOp ::= "!" | "not" |"#" | "~" | "*" | "^"
//ok
enum UnaryOperator returns UnaryOperator:
not = 'not' | card = '#' | transpose = '~' | reflexiveClosure = '*' | closure = '^' | not2 = '!';
//16) | "(" expr ")"
//9) | number
//10) | "-" number
//17) | ["#"] Name
//11) | "none"
//12) | "iden"
//13) | "univ"
//14) | "Int"
//15) | "seq/Int"
//16) | "(" expr ")"
//10) | ["-"] number
//17) | "#" Name
//17) | reference
//12)13) | constante
finalExpression returns TerminalExpression:
BracketExpression | NumberExpression | NotExpandedExpression | ReferenceExpression | ConstantExpression;
//16) | "(" expr ")"
//ok
BracketExpression returns BracketExpression:
'('expression=Expression')';
//9) | number
//10) | "-" number
//ok
Number returns Number:
NumberExpression;
//ok
NumberExpression returns NumberExpression:
value=EInt;
//17) | ["#"] Name
//17) | "#" Name
//ok
NotExpandedExpression returns NotExpandedExpression:
'#' name=[IDref];
//ok
ReferenceExpression returns ReferenceExpression:
reference=Reference;
//ref ::= name | "univ" | "Int" | "seq/Int"
//ok
Reference returns Reference:
ReferenceName | ConstanteReference;
//ok
ConstanteReference returns ConstanteReference:
cst=constanteRef;
//13) | "univ"
//14) | "Int"
//15) | "seq/Int"
//ok
enum constanteRef returns constanteRef:
int = 'Int' | seqint = 'seq/Int' | univ = 'univ';
//ok
ReferenceName returns ReferenceName:
name=IDref;
//ok
ConstantExpression returns ConstantExpression:
constante=Constant;
//11) | "none"
//12) | "iden"
//ok
enum Constant returns Constant:
none = 'none' | iden = 'iden';
//ok
Block returns Block:
BlockExpr;
//letDecl ::= name "=" expr
//ok
LetDeclaration returns LetDeclaration:
varName=VarDecl '=' expression=Expression;
//quant ::= "all" | "no" | "some" | "lone" | "one" | "sum"
//quant ::= "all" | "no" | "some" | "lone" | "one" | "null"
//ok
enum QuantifiedExpressionQuantifier returns QuantifiedExpressionQuantifier:
no = 'no' | one = 'one' | lone = 'lone' | some = 'some' | all = 'all' | null = 'null';
//decl ::= ["private"] ["disj"] name,+ ":" ["disj"] expr
//ok
VarDeclaration returns VarDeclaration:
(isPrivate?='private')? (varsAreDisjoint?='disj')? names+=VarDecl ( "," names+=VarDecl)* ':' (expressionIsDisjoint?='disj')? expression=Expression;
//scope ::= "for" number ["expect" (0|1)]
//scope ::= "for" number "but" typescope,+ ["expect" (0|1)]
//scope ::= "for" typescope,+ ["expect" (0|1)]
//scope ::= ["expect" (0|1)]
//
//scope ::= "for" [number] ["but"] typescope,*
//ok
Scope returns Scope:
{Scope}
'for' (number=Number)? (but?='but')? (typeScope+=TypeScope ( "," typeScope+=TypeScope)*)?;
//typescope ::= ["exactly"] number [name|"int"|"seq"]
//typescope ::= ExactlyNumber target
TypeScope returns TypeScope:
num=ExactlyNums target=[TypeScopeTarget];
//[name|"int"|"seq"]
//[Validname|"int"|"seq"]
//ok
TypeScopeTarget_Impl returns TypeScopeTarget:
{TypeScopeTarget}
;
Int0 returns Int:
{Int}
'Int'
;
Seq returns Seq:
{Seq}
'Seq'
;
But at runtime of the editor, I have the following error on the little exemple :
sig A {}
sig B {
a : A
}
Multiple markers at this line (line a : A)
- no viable alternative at input 'A'
- missing EOF at '->'
- missing '}' at 'a'
the rule works for the first one, but not the second one. It is at is expect there is no relationDeclartion between the brackets.
I think it is due to the declaration of the rule relationDeclaration call with the form :
(relations+=RelationDeclaration ( ',' relations+=RelationDeclaration)*)?
I cannot get what is wrong.
What did I miss?
What can I do to make it work?
Thanks in advance.

in your grammar for RelationDeclaration it seems you miss some optional marking questionmarks (isPrivate?='private')? (varsAreDisjoint?='disj')? (the same problem is all over the grammar)
?= declares the attribute as optional, but only a ? around the rule call makes it actually optional

Related

ANTLR G4 grammar giving error in input validation

I have a grammer which I have created to validate below input and convert it to entity.
Input:
(
[LANGUAGE] IN ("Arabic", "Dutch")
AND
[Content Series] IN ("The Walking Dead")
AND
[PUBLISHER_NAME] IN ("Yahoo Search", "Yahoo! NAR")
)
OR
(
[LANGUAGE] IN ("English")
AND
[PUBLISHER_NAME] IN ("Aol News", "Microsoft-Bing!")
)
Exp.g4
grammar Exp;
options {
language = Java;
}
start
: expr EOF
;
expr
: NOT expr
| '(' expr ')' expr
| expr ENTITY expr
| expr AND expr
| expr OR expr
| entity
| list
| VALUE
;
entity
: KEY comp VALUE
| KEY list_op list
| KEY exists_op
;
list
: '(' (VALUE (',' VALUE)*)? ')'
;
list_op
: BETWEEN
| NOT_BETWEEN
| IN
| NOT_IN
| CONTAINS
| NOT_CONTAINS
;
exists_op
: EXISTS
| NOT_EXISTS
;
comp
: EQ
| NEQ
| GT
| LET
| GTE
| LETE
;
VALUE : '"' .*? '"';
KEY : '[' .*? ']';
OR : 'OR';
AND : 'AND';
NOT : 'NOT';
ENTITY : 'ENTITY';
NOT_CONTAINS : 'NOT_CONTAINS';
CONTAINS : 'CONTAINS';
NOT_IN : 'NOT_IN';
IN : 'IN';
NOT_BETWEEN : 'NOT_BETWEEN';
BETWEEN : 'BETWEEN';
NOT_EXISTS : 'NOT_EXISTS';
EXISTS : 'EXISTS';
LETE : '<=';
GTE : '>=';
LET : '<';
GT : '>';
NEQ : '!=';
EQ : '=';
WS : [\t\r\n ]+ -> channel(HIDDEN);
Though this grammer is giving me below error:-
line 1:0 extraneous input '(' expecting KEY
(entity ( [LANGUAGE] (list_op IN) (list ( "Arabic" , "Dutch" )))
I have tried changing grammar but no help.

Change '(' expr ')' expr into '(' expr ')'

How can I modify my EBNF to handle cases like `- not 12`, `not + -1`

I created EBNF for expressions below
<expression> ::= <or_operand> [ "or" <or_operand> ]
<or_operand> ::= <and_operand> [ "and" <and_operand> ]
<and_operand> ::= <equality_operand> [ ( "=" | "!=" ) <equality_operand> ]
<equality_operand> ::= <simple_expression> [ <relational_operator> <simple_expression> ]
<relational_op> ::= "<" | ">" | "<=" | ">="
<simple_expression> ::= <term> [ ( "+" | "-" ) <term> ]
<term> ::= <factor> [ ( "*" | "/" ) <factor> ]
<factor> ::= <literal>
| "(" <expression> ")"
| "not" <factor>
| ( "+" | "-" ) <factor>
<literal> ::= <boolean_literal> | <number>
<boolean_literal> ::= "true" | "false"
<number> ::= <digit> [ <digit> ]
<digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
My problem lies within the factor section
<factor> ::= <literal>
| "(" <expression> ")"
| "not" <factor>
| ( "+" | "-" ) <factor>
You can see that I included three unary operators not, -, and +. They work for specific types like not only applies to boolean values only and +/- applies to numbers only.
I don't know how to handle cases when not is mixed with +/- like not +7, - not true, etc. for example. Is there any way I can modify the grammar so not can never be mixed with +/-?
Will it suffice?
<factor> ::= <literal>
| "(" <expression> ")"
| ( "not" | ( "+" | "-" ) ) <factor>
Or maybe it's parser's job to solve this issue?

This is quite easy to solve. You have two different styles of expressions each with their own syntax, so you just do not mix them, and keep their syntax rules separated.
A boolean expression can only occur in certain places, such as an assignment or some kind of choice statement. A numerical expression can only occur in a comparison or an assignment. This is not something that is handled at the semantic level, and if one looks at the grammar for many languages, this is how it is solved.
So you have for numeric expressions:
<simple_expression> ::= <term> [ ( "+" | "-" ) <term> ]
<term> ::= <factor> [ ( "*" | "/" ) <factor> ]
<factor> ::= <number>
| "(" <expression> ")"
| ( "+" | "-" ) <factor>
<number> ::= <digit> [ <digit> ]
<digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
This is now self contained. We can now build this into boolean expressions:
<boolean_expression> ::= "not" <boolean_expression>
| <logical_expression>
<logical_expression> ::= <or_operand> [ "or" <or_operand> ]
<or_operand> ::= <and_operand> [ "and" <and_operand> ]
<and_operand> ::= <equality_operand> [ ( "=" | "!=" ) <equality_operand> ]
<equality_operand> ::= <simple_expression> [ <relational_operator> <simple_expression> ]
| <boolean_literal>
<relational_op> ::= "<" | ">" | "<=" | ">="
<boolean_literal> ::= "true" | "false"
Notice that I permitted the equality comparison of boolean literals, however if you did not want to permit this you could change the rules to only permit them for an and operand.
Now we can use these in another rule, such as assignment:
<assignment> ::= <variable> ":=" ( <simple_expression> | <boolean_expression> )
All is done.

int('1.5') VS float('1.5') in python

Why does float('1.5') gives 1.5 as output as expected but int('1.5') gives a value error?
Shouldn't python automatically convert the string into float and then into integer.

Because 1.5 isn't a valid integer literal which is required by the int() function.
From the docs:
If x is not a number or if base is given, then x must be a string,
bytes, or bytearray instance representing an integer literal in radix
base.
Whereas integer literals are defined as follows:
integer ::= decinteger | bininteger | octinteger | hexinteger
decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
nonzerodigit ::= "1"..."9"
digit ::= "0"..."9"
bindigit ::= "0" | "1"
octdigit ::= "0"..."7"
hexdigit ::= digit | "a"..."f" | "A"..."F"
Source: https://docs.python.org/3/reference/lexical_analysis.html#integers

Removing ambiguity in antlr4 lexing

I've been trying to convert a grammar that I have found online into an antlr4 format. The original grammar is here: https://github.com/cv/asp-parser/blob/master/vbscript.bnf.
Short Version:
The current problem that I am running across I think is due to ambiguity at the lexing stage.
For example, I copied the rule for a floating point literal as below:
float_literal : DIGIT* '.' DIGIT+ ( 'e' PLUS_OR_MINUS? DIGIT+ )?
| DIGIT+ 'e' PLUS_OR_MINUS? DIGIT+;
Further up in the file I have a definition for letters:
LETTER: 'a'..'z';
It seems that because I am using 'e' in the float literal, that character can't be recognised as a letter? In my research I have come across the idea of having a token for each letter, so letter would become:
letter: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z;
And I would replace any instances of 'e' with E. However there are much longer strings in this file, such as '.and'. So this approach would require replacing things like that with DOT A N D? Which doesn't seem right at all.
Am I doing something fundamentally wrong or is there something I can do to avoid this ambiguity?
Thanks,
Craig
The full grammar is below.
grammar vbscript;
/*===== Character Sets =====*/
SPACES: ' ' -> skip;
DIGIT: '0'..'9';
SEMI_COLON: ':';
NEW_LINE_CHARACTER: [\r\n]+;
WHITESPACE_CHARACTER: [ \t];
LETTER: 'a'..'z';
QUOTE: '"';
HASH: '#';
SQUARE_BRACE: '[' | ']';
PLUS_OR_MINUS: [+-];
ANYTHING_ELSE: ~('"' | '#');
ws: WHITESPACE_CHARACTER;
id_tail: (DIGIT | LETTER | '_');
string_character: ANYTHING_ELSE | DIGIT | WHITESPACE_CHARACTER | SEMI_COLON | LETTER | PLUS_OR_MINUS | SQUARE_BRACE;
id_name_char: ANYTHING_ELSE | DIGIT | WHITESPACE_CHARACTER | SEMI_COLON | LETTER | PLUS_OR_MINUS;
/*===== terminals =====*/
whitespace: ws+ | '_' ws* new_line?;
comment_line : '' | 'rem';
string_literal : '"' ( string_character | '""' )* '"';
float_literal : DIGIT* '.' DIGIT+ ( 'e' PLUS_OR_MINUS? DIGIT+ )?
| DIGIT+ 'e' PLUS_OR_MINUS? DIGIT+;
id : LETTER id_tail*
| '[' id_name_char* ']';
iddot : LETTER id_tail* '.'
| '[' id_name_char* ']' '.'
| 'and.'
| 'byref.'
| 'byval.'
| 'call.'
| 'case.'
| 'class.'
| 'const.'
| 'default.'
| 'dim.'
| 'do.'
| 'each.'
| 'else.'
| 'elseif.'
| 'empty.'
| 'end.'
| 'eqv.'
| 'erase.'
| 'error.'
| 'exit.'
| 'explicit.'
| 'false.'
| 'for.'
| 'function.'
| 'get.'
| 'goto.'
| 'if.'
| 'imp.'
| 'in.'
| 'is.'
| 'let.'
| 'loop.'
| 'mod.'
| 'new.'
| 'next.'
| 'not.'
| 'nothing.'
| 'null.'
| 'on.'
| 'option.'
| 'or.'
| 'preserve.'
| 'private.'
| 'property.'
| 'public.'
| 'redim.'
| 'rem.'
| 'resume.'
| 'select.'
| 'set.'
| 'step.'
| 'sub.'
| 'then.'
| 'to.'
| 'true.'
| 'until.'
| 'wend.'
| 'while.'
| 'with.'
| 'xor.';
dot_id : '.' LETTER id_tail*
| '.' '[' id_name_char* ']'
| '.and'
| '.byref'
| '.byval'
| '.call'
| '.case'
| '.class'
| '.const'
| '.default'
| '.dim'
| '.do'
| '.each'
| '.else'
| '.elseif'
| '.empty'
| '.end'
| '.eqv'
| '.erase'
| '.error'
| '.exit'
| '.explicit'
| '.false'
| '.for'
| '.function'
| '.get'
| '.goto'
| '.if'
| '.imp'
| '.in'
| '.is'
| '.let'
| '.loop'
| '.mod'
| '.new'
| '.next'
| '.not'
| '.nothing'
| '.null'
| '.on'
| '.option'
| '.or'
| '.preserve'
| '.private'
| '.property'
| '.public'
| '.redim'
| '.rem'
| '.resume'
| '.select'
| '.set'
| '.step'
| '.sub'
| '.then'
| '.to'
| '.true'
| '.until'
| '.wend'
| '.while'
| '.with'
| '.xor';
dot_iddot : '.' LETTER id_tail* '.'
| '.' '[' id_name_char* ']' '.'
| '.and.'
| '.byref.'
| '.byval.'
| '.call.'
| '.case.'
| '.class.'
| '.const.'
| '.default.'
| '.dim.'
| '.do.'
| '.each.'
| '.else.'
| '.elseif.'
| '.empty.'
| '.end.'
| '.eqv.'
| '.erase.'
| '.error.'
| '.exit.'
| '.explicit.'
| '.false.'
| '.for.'
| '.function.'
| '.get.'
| '.goto.'
| '.if.'
| '.imp.'
| '.in.'
| '.is.'
| '.let.'
| '.loop.'
| '.mod.'
| '.new.'
| '.next.'
| '.not.'
| '.nothing.'
| '.null.'
| '.on.'
| '.option.'
| '.or.'
| '.preserve.'
| '.private.'
| '.property.'
| '.public.'
| '.redim.'
| '.rem.'
| '.resume.'
| '.select.'
| '.set.'
| '.step.'
| '.sub.'
| '.then.'
| '.to.'
| '.true.'
| '.until.'
| '.wend.'
| '.while.'
| '.with.'
| '.xor.';
/*===== rules =====*/
new_line: (SEMI_COLON | NEW_LINE_CHARACTER)+;
program: new_line? global_stmt_list;
/*===== rules: declarations =====*/
class_decl: 'class' extended_id new_line member_decl_list 'end' 'class' new_line;
member_decl_list: member_decl*;
member_decl: field_decl | var_decl | const_decl | sub_decl | function_decl | property_decl;
field_decl:
'private' field_name other_vars_opt new_line
| 'public' field_name other_vars_opt new_line;
field_name: field_id '(' array_rank_list ')' | field_id;
field_id: id | 'default' | 'erase' | 'error' | 'explicit' | 'step';
var_decl: 'dim' var_name other_vars_opt new_line;
var_name: extended_id '(' array_rank_list ')' | extended_id;
other_vars_opt: (',' var_name other_vars_opt)?;
array_rank_list: (int_literal ',' array_rank_list | int_literal)?;
const_decl: access_modifier_opt 'const' const_list new_line;
const_list: extended_id '=' const_expr_def ',' const_list | extended_id '=' const_expr_def;
const_expr_def: '(' const_expr_def ')'
| '-' const_expr_def
| '+' const_expr_def
| const_expr;
sub_decl:
method_access_opt 'sub' extended_id method_arg_list new_line method_stmt_list 'end' 'sub' new_line
| method_access_opt 'sub' extended_id method_arg_list inline_stmt 'end' 'sub' new_line;
function_decl:
method_access_opt 'function' extended_id method_arg_list new_line method_stmt_list 'end' 'function' new_line
| method_access_opt 'function' extended_id method_arg_list inline_stmt 'end' 'function' new_line;
method_access_opt: 'public' 'default' | access_modifier_opt;
access_modifier_opt: ('public' | 'private')?;
method_arg_list: ('(' arg_list? ')')?;
arg_list: arg (',' arg_list)?;
arg: arg_modifier_opt extended_id ('(' ')')?;
arg_modifier_opt: ('byval' | 'byref')?;
property_decl: method_access_opt 'property' property_access_type extended_id method_arg_list new_line method_stmt_list 'end' 'property' new_line;
property_access_type: 'get' | 'let' | 'set';
/*===== rules: statements =====*/
global_stmt: option_explicit | class_decl | field_decl | const_decl | sub_decl | function_decl | block_stmt;
method_stmt: const_decl | block_stmt;
block_stmt:
var_decl
| redim_stmt
| if_stmt
| with_stmt
| select_stmt
| loop_stmt
| for_stmt
| inline_stmt new_line;
inline_stmt:
assign_stmt
| call_stmt
| sub_call_stmt
| error_stmt
| exit_stmt
| 'erase' extended_id;
global_stmt_list: global_stmt_list global_stmt | global_stmt;
method_stmt_list: method_stmt*;
block_stmt_list: block_stmt*;
option_explicit: 'option' 'explicit' new_line;
error_stmt: 'on' 'error' 'resume' 'next' | 'on' 'error' 'goto' int_literal;
exit_stmt: 'exit' 'do' | 'exit' 'for' | 'exit' 'function' | 'exit' 'property' | 'exit' 'sub';
assign_stmt:
left_expr '=' expr
| 'set' left_expr '=' expr
| 'set' left_expr '=' 'new' left_expr;
sub_call_stmt: qualified_id sub_safe_expr? comma_expr_list
| qualified_id sub_safe_expr?
| qualified_id '(' expr ')' comma_expr_list
| qualified_id '(' expr ')'
| qualified_id '(' ')'
| qualified_id index_or_params_list '.' left_expr_tail sub_safe_expr? comma_expr_list
| qualified_id index_or_params_list_dot left_expr_tail sub_safe_expr? comma_expr_list
| qualified_id index_or_params_list '.' left_expr_tail sub_safe_expr?
| qualified_id index_or_params_list_dot left_expr_tail sub_safe_expr?;
call_stmt: 'call' left_expr;
left_expr: qualified_id index_or_params_list '.' left_expr_tail
| qualified_id index_or_params_list_dot left_expr_tail
| qualified_id index_or_params_list
| qualified_id
| safe_keyword_id;
left_expr_tail: qualified_id_tail index_or_params_list '.' left_expr_tail
| qualified_id_tail index_or_params_list_dot left_expr_tail
| qualified_id_tail index_or_params_list
| qualified_id_tail;
qualified_id: iddot qualified_id_tail
| dot_iddot qualified_id_tail
| id
| dot_id;
qualified_id_tail: iddot qualified_id_tail
| id
| keyword_id;
keyword_id: safe_keyword_id
| 'and'
| 'byref'
| 'byval'
| 'call'
| 'case'
| 'class'
| 'const'
| 'dim'
| 'do'
| 'each'
| 'else'
| 'elseif'
| 'empty'
| 'end'
| 'eqv'
| 'exit'
| 'false'
| 'for'
| 'function'
| 'get'
| 'goto'
| 'if'
| 'imp'
| 'in'
| 'is'
| 'let'
| 'loop'
| 'mod'
| 'new'
| 'next'
| 'not'
| 'nothing'
| 'null'
| 'on'
| 'option'
| 'or'
| 'preserve'
| 'private'
| 'public'
| 'redim'
| 'resume'
| 'select'
| 'set'
| 'sub'
| 'then'
| 'to'
| 'true'
| 'until'
| 'wend'
| 'while'
| 'with'
| 'xor';
safe_keyword_id: 'default'
| 'erase'
| 'error'
| 'explicit'
| 'property'
| 'step';
extended_id: safe_keyword_id
| id;
index_or_params_list: index_or_params index_or_params_list
| index_or_params;
index_or_params: '(' expr comma_expr_list ')'
| '(' comma_expr_list ')'
| '(' expr ')'
| '(' ')';
index_or_params_list_dot: index_or_params index_or_params_list_dot
| index_or_params_dot;
index_or_params_dot: '(' expr comma_expr_list ').'
| '(' comma_expr_list ').'
| '(' expr ').'
| '(' ').';
comma_expr_list: ',' expr comma_expr_list
| ',' comma_expr_list
| ',' expr
| ',';
/* redim statement */
redim_stmt: 'redim' redim_decl_list new_line
| 'redim' 'preserve' redim_decl_list new_line;
redim_decl_list: redim_decl ',' redim_decl_list
| redim_decl;
redim_decl: extended_id '(' expr_list ')';
/* if statement */
if_stmt: 'if' expr 'then' new_line block_stmt_list else_stmt_list 'end' 'if' new_line
| 'if' expr 'then' inline_stmt else_opt end_if_opt new_line;
else_stmt_list: ('elseif' expr 'then' new_line block_stmt_list else_stmt_list
| 'elseif' expr 'then' inline_stmt new_line else_stmt_list
| 'else' inline_stmt new_line
| 'else' new_line block_stmt_list)?;
else_opt: ('else' inline_stmt)?;
end_if_opt : ('end' 'if')?;
/* with statement */
with_stmt: 'with' expr new_line block_stmt_list 'end' 'with' new_line;
/* loop statement */
loop_stmt: 'do' loop_type expr new_line block_stmt_list 'loop' new_line
| 'do' new_line block_stmt_list 'loop' loop_type expr new_line
| 'do' new_line block_stmt_list 'loop' new_line
| 'while' expr new_line block_stmt_list 'wend' new_line;
loop_type: 'while' | 'until';
/* for statement */
for_stmt: 'for' extended_id '=' expr 'to' expr step_opt new_line block_stmt_list 'next' new_line
| 'for' 'each' extended_id 'in' expr new_line block_stmt_list 'next' new_line;
step_opt: ('step' expr)?;
/* select statement */
select_stmt: 'select' 'case' expr new_line cast_stmt_list 'end' 'select' new_line;
cast_stmt_list: ('case' expr_list nl_opt block_stmt_list cast_stmt_list
| 'case' 'else' nl_opt block_stmt_list)?;
nl_opt: new_line?;
expr_list: expr ',' expr_list | expr;
/*===== rules: expressions =====*/
sub_safe_expr: sub_safe_imp_expr;
sub_safe_imp_expr: sub_safe_imp_expr 'imp' eqv_expr | sub_safe_eqv_expr;
sub_safe_eqv_expr: sub_safe_eqv_expr 'eqv' xor_expr
| sub_safe_xor_expr;
sub_safe_xor_expr: sub_safe_xor_expr 'xor' or_expr
| sub_safe_or_expr;
sub_safe_or_expr: sub_safe_or_expr 'or' and_expr
| sub_safe_and_expr;
sub_safe_and_expr : sub_safe_and_expr 'and' not_expr
| sub_safe_not_expr;
sub_safe_not_expr : 'not' not_expr
| sub_safe_compare_expr;
sub_safe_compare_expr : sub_safe_compare_expr 'is' concat_expr
| sub_safe_compare_expr 'is' 'not' concat_expr
| sub_safe_compare_expr '>=' concat_expr
| sub_safe_compare_expr '=>' concat_expr
| sub_safe_compare_expr '<=' concat_expr
| sub_safe_compare_expr '=<' concat_expr
| sub_safe_compare_expr '>' concat_expr
| sub_safe_compare_expr '<' concat_expr
| sub_safe_compare_expr '<>' concat_expr
| sub_safe_compare_expr '=' concat_expr
| sub_safe_concat_expr;
sub_safe_concat_expr : sub_safe_concat_expr '&' add_expr
| sub_safe_add_expr;
sub_safe_add_expr : sub_safe_add_expr '+' mod_expr
| sub_safe_add_expr '-' mod_expr
| sub_safe_mod_expr;
sub_safe_mod_expr : sub_safe_mod_expr 'mod' int_div_expr
| sub_safe_int_div_expr;
sub_safe_int_div_expr : sub_safe_int_div_expr '\\' mult_expr
| sub_safe_mult_expr;
sub_safe_mult_expr : sub_safe_mult_expr '*' unary_expr
| sub_safe_mult_expr '/' unary_expr
| sub_safe_unary_expr;
sub_safe_unary_expr : '-' unary_expr
| '+' unary_expr
| sub_safe_exp_expr;
sub_safe_exp_expr : sub_safe_value '^' exp_expr
| sub_safe_value;
sub_safe_value : const_expr
| left_expr
| '(' expr ')';
expr : imp_expr;
imp_expr : imp_expr 'imp' eqv_expr
| eqv_expr;
eqv_expr : eqv_expr 'eqv' xor_expr
| xor_expr;
xor_expr : xor_expr 'xor' or_expr
| or_expr;
or_expr : or_expr 'or' and_expr
| and_expr;
and_expr : and_expr 'and' not_expr
| not_expr;
not_expr : 'not' not_expr
| compare_expr;
compare_expr : compare_expr 'is' concat_expr
| compare_expr 'is' 'not' concat_expr
| compare_expr '>=' concat_expr
| compare_expr '=>' concat_expr
| compare_expr '<=' concat_expr
| compare_expr '=<' concat_expr
| compare_expr '>' concat_expr
| compare_expr '<' concat_expr
| compare_expr '<>' concat_expr
| compare_expr '=' concat_expr
| concat_expr;
concat_expr : concat_expr '&' add_expr
| add_expr;
add_expr : add_expr '+' mod_expr
| add_expr '-' mod_expr
| mod_expr;
mod_expr : mod_expr 'mod' int_div_expr
| int_div_expr;
int_div_expr : int_div_expr '\\' mult_expr
| mult_expr;
mult_expr : mult_expr '*' unary_expr
| mult_expr '/' unary_expr
| unary_expr;
unary_expr : '-' unary_expr
| '+' unary_expr
| exp_expr;
exp_expr : value '^' exp_expr
| value;
value : const_expr
| left_expr
| '(' expr ')';
const_expr : bool_literal
| int_literal
| float_literal
| string_literal
| nothing;
bool_literal : 'true'
| 'false';
int_literal : DIGIT+;
nothing : 'nothing'
| 'null'
| 'empty';

Your grammar defines "Literals" in the parser section. Note that ANTLR treats every lower case rule as parser rule (upper case rules are lexer rules).
Your small problem part may be solved like this:
FLOAT_LITERAL
: DIGIT* '.' DIGIT+ ( 'e' PLUS_OR_MINUS? DIGIT+ )?
| DIGIT+ 'e' PLUS_OR_MINUS? DIGIT+;
LETTER
: [a-z];
The ANTLR lexer prefers the longest matching rule (if two rules are in conflict, it prefers the first defined one). Both rules are completely disjunct so the sequence of definition is not relevant (its just more readable to define the more complex rules above the basic ones).
You can extend the second definition by upper case Characters:
LETTER
: [a-zA-Z];
To solve the overall problem of your grammar, you will need a complete rewriting of your grammar. Most rules of the terminals section should be lexer rules. However the terminals sections seems overly populated so that it could also be that some rules are workarounds for non existent parser rules.

Implement a jump function

I'm trying to create an interpreter for my scripting language using ANTLR4. I have yet implemented the standard operations (mul, div, sub, etc...) using visitors and now i've to implement a jump \ Salta function call. Jump(n) FunctionCall ignore n rows after his call. Example:
Fai var1 = 3,var2 = 4;
Fai Salta(1); //my jump() function call
Fai var1=4;
println(var1);
output: 3
This is my current grammar:
grammar TL;
#members{
int salta=0;
}
parse
: block+ EOF
;
block
: DO statement (','statement )* END // Fai a=2,B=e;
; //manca l'if
DO:'Fai';
END:';';
Salta:'Salta';
statement
:assign
|functionCall
|saltaCall
;
functionCall
: Identifier '(' exprList? ')' #identifierFunctionCall
| Println '(' expr? ')' #printlnFunctionCall
| Print '(' expr ')' #printFunctionCall
;
saltaCall
:Salta '(' Number ')' rows[$Number.int]
;
rows[int n]
locals[int i=0;]
:({$i<$n}?END? block {$i++;})*
;
exprList
: expr (',' expr)*
;
assign
:Identifier '=' expr
;
Identifier
: [a-zA-Z_] [a-zA-Z_0-9]*
;
expr
: '-'expr #unaryMinusExpr
| '!'expr #notExpr
| expr '^' expr #powerExpr
| expr '*' expr #multiplyExpr
| expr '/' expr #divideExpr
| expr '%' expr #modulusExpr
| expr '+' expr #addExpr
| expr '-' expr #subtractExpr
| expr '>=' expr #gtEqExpr
| expr '<=' expr #ltEqExpr
| expr '>' expr #gtExpr
| expr '<' expr #ltExpr
| expr '==' expr #eqExpr
| expr 'O' expr #orExpr
| expr 'E' expr #andExpr
| expr '=' expr #eqExpr
| Number #numberExpr
| Bool #boolExpr
| Null #nullExpr
| functionCall #functionCallExpr
| Identifier #identifierExpr
| String #stringExpr
| '(' expr ')' #exprExpr
;
Println:'println';
Print:'print';
Null:'null';
Or : 'O';
And : 'E';
Equals : '==';
NEquals : '!=';
GTEquals : '>=';
LTEquals : '<=';
Pow : '^';
Excl : '!';
GT : '>';
LT : '<';
Add : '+';
Subtract : '-';
Multiply : '*';
Divide : '/';
Modulus : '%';
OBrace : '{';
CBrace : '}';
OBracket : '[';
CBracket : ']';
OParen : '(';
CParen : ')';
Assign : '=';
QMark : '?';
Colon : ':';
Bool
: 'true'
| 'false'
;
Number
: Int ('.' Digit*)?
;
String
: ["] (~["\r\n] | '\\\\' | '\\"')* ["]
| ['] (~['\r\n] | '\\\\' | '\\\'')* [']
;
fragment Int
: [1-9] Digit*
| '0'
;
fragment Digit
: [0-9]
;
fragment NL
: '\n'
;
// ---------SKIP------------
Comment
: ('#' ~[\r\n]* ) -> skip
;
Space
: [ \t\r\n\u000C] -> skip
;
How can I implement that function?

It's easier to have a look at my Mu interpreter. Your jump call would look a lot like a log call:
jump
: JUMP expr SCOL
;
JUMP : 'jump';
Then override the visitJump method and keep track of the value inside the jump(...) call inside your EvalVisitor.
Now all you would need to do is override the visitBlock method and if the recorded value inside value inside the jump(...) is more then 0, don't visit the next expression. Some pseudo code:
public class EvalVisitor extends MuBaseVisitor<Value> {
...
private Double jumpAmount = 0.0;
#Override
public Value visitBlock(#NotNull MuParser.BlockContext ctx) {
for (MuParser.StatContext statContext : ctx.stat()) {
if jumpAmount is more than 0, decrease by 1
else visit (execute) statContext
}
return Value.VOID;
}
#Override
public Value visitJump(#NotNull MuParser.JumpContext ctx) {
Value amount = this.visit(ctx.expr());
jumpAmount = amount.asDouble();
return amount;
}
...
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Xtext : identify when a optional must be call - modeling

in your grammar for RelationDeclaration it seems you miss some optional marking questionmarks (isPrivate?='private')? (varsAreDisjoint?='disj')? (the same problem is all over the grammar) ?= declares the attribute as optional, but only a ? around the rule call makes it actually optional

Related

ANTLR G4 grammar giving error in input validation

How can I modify my EBNF to handle cases like `- not 12`, `not + -1`

int('1.5') VS float('1.5') in python

Removing ambiguity in antlr4 lexing

Implement a jump function

Categories

Resources