This crate provides a binary to compile grammars into Rust code and a library implementing Earley's parsing algorithm to parse the grammars specified.
This crate is gramatica
. To use it you should install it in order to acquire the gramatica_compiler
binary and also add gramatica
to your dependencies in your project's Cargo.toml
.
toml
[dependencies]
gramatica = "0.1"
Then, if you have made a grammar file example.rsg
execute gramatica_compiler example.rsg > example.rs
. Afterwards you may use the generated file example.rs
as a source Rust file.
The classical example is to implement a calculator.
```rust //This is a just Rust header that it is copied literally extern crate gramatica; use std::cmp::Ordering; use gramatica::{Associativity,EarleyKind,State,Parser,ParsingTablesTrait,AmbiguityInfo};
//Here the proper grammar begins. //These lines are processed by gramaticacompiler to generate the Token enum and the parsing tables. //We begin by terminal tokens (symbols that are not in the left of any rule but have a literal representation). //For this example all terminals are regular expressions. The first argument of reterminal! is the type entry, as used in a enum. reterminal!(Num(f64),"[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?"); reterminal!(Plus,"\+"); reterminal!(Minus,"-"); reterminal!(Star,"\*"); reterminal!(Slash,"/"); reterminal!(Caret,"\^"); reterminal!(LPar,"\("); reterminal!(RPar,"\)"); reterminal!(NewLine,"\n"); reterminal!(_,"\s+");//Otherwise skip spaces
//Now is the turn of nonterminal tokens. The first one is the default start symbol. //These have rules written as match clauses, with the pattern being the reduction of the nonterminal token and the expression being the value the token takes when reducing. //In this case the type of the symbol is empty and so is the expression nonterminal Input { () => (), (Input,Line) => (), }
//Although the value type of Line is empty we may have code executed on the reduction nonterminal Line { (NewLine) => (), (Expression(value), NewLine) => { println!("{}",value); }, }
//Finally a token with value type. Each rule creates the value in a different way. //Most rules are annotated to avoid ambiguities nonterminal Expression(f64) { (Num(value)) => value, #[priority(addition)] #[associativity(left)] (Expression(l),Plus,Expression(r)) => l+r, #[priority(addition)] #[associativity(left)] (Expression(l),Minus,Expression(r)) => l-r, #[priority(multiplication)] #[associativity(left)] (Expression(l),Star,Expression(r)) => l*r, #[priority(multiplication)] #[associativity(left)] (Expression(l),Slash,Expression(r)) => l/r, #[priority(addition)] #[associativity(left)] (Minus,Expression(value)) => -value, #[priority(exponentiation)] #[associativity(right)] (Expression(l),Caret,Expression(r)) => l.powf(r), (LPar,Expression(value),RPar) => value, }
//The ordering macro-like sets the order of application of the previously annotated rules ordering!(exponentiation,multiplication,addition);
//Finally an example of using the grammar to parse some lines from stdin.
//We could do this or something similar in a different file if we desired to.
use std::io::BufRead;
fn main()
{
let stdin=std::io::stdin();
for rline in stdin.lock().lines()
{
let line=rline.unwrap()+"\n";
println!("line={}",line);
match Parser::
To define terminal tokens not expressable with regular expressions you may use the following. It must containg a _match function returning an option containing the number of chars mathed and the value of the token.
rust
terminal LitChar(char)
{
fn _match(parser: &mut Parser<Token,ParsingTables>, source:&str) -> Option<(usize,char)>
{
let mut characters=source.chars();
if (characters.next())==(Some('\''))
{
let mut c=characters.next().unwrap();
let mut size=3;
if c=='\\'
{
c=(characters.next().unwrap());
size=4;
}
if characters.next().unwrap()=='\''
{
Some((size,c))
}
else
{
None
}
}
else
{
None
}
}
}
Since version 0.1.1 there is also a keyword_terminal!
macro:
rust
keyword_terminal!(Const,"const");
Each rule is written as a match clause, whose ending expression is the value that the nonterminal token gets after being parsed. For example, to parse a list of statements:
rust
nonterminal Stmts(Vec<StmtKind>)
{
(Stmt(ref stmt)) => vec![stmt.clone()],
(Stmts(ref stmts),Stmt(ref stmt)) =>
{
let mut new=(stmts.clone());
new.push(stmt.clone());
new
},
}
Reductions only execute if they are part of the final syntactic tree.
To avoid ambiguities you have two options: to ensure the grammar does not contain them or to priorize rules by introducing annotations. In the example of the calculator we have seen two kinds:
- #[priority(p_name)]
to declare a rule with priority p_name
. Later there should be a ordering!(p_0,p_1,p_2,...)
macro-like to indicate that p_0
should reduce before p_1
.
- #[associativity(left/right)]
to decide how to proceed when nesting the same rule.
```rust extern crate gramatica;
use std::cmp::Ordering; use gramatica::{Associativity,EarleyKind,State,Parser,ParsingTablesTrait,AmbiguityInfo};
//See https://www.json.org/
use std::rc::Rc;
//We define an auxiliar type to store JSON values
enum JsonValue
{
Literal(String),
Number(f64),
Object(Vec<(String,JsonValue)>),
Array(Vec
// ---- Start of the grammar ---- keywordterminal!(True,"true"); keywordterminal!(False,"false"); keyword_terminal!(Null,"null");
re_terminal!(Number(f64),"[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?");
terminal LitStr(String)
{
//This function has limited escaping capabilities
fn _match(parser: &mut Parser
reterminal!(LBrace,"\{"); reterminal!(RBrace,"\}"); reterminal!(LBracket,"\["); reterminal!(RBracket,"\]"); reterminal!(Comma,","); reterminal!(Colon,":"); reterminal!(,"\s+|\n");//Otherwise skip spaces
nonterminal Object(JsonValue) { (LBrace,RBrace) => JsonValue::Object(vec![]), (LBrace,Members(ref list),RBrace) => JsonValue::Object(list.clone()), }
nonterminal Members(Vec<(String,JsonValue)>) { (Pair(ref s,ref value)) => vec![(s.clone(),value.clone())], //(Pair,Comma,Members) => (), (Members(ref list),Comma,Pair(ref s,ref value)) => { let mut new=(list.clone()); new.push((s.clone(),value.clone())); new }, }
nonterminal Pair(String,JsonValue) { (LitStr(ref s),Colon,Value(ref value)) => (s.clone(),value.clone()), }
nonterminal Array(Vec
nonterminal Elements(Vec
nonterminal Value(JsonValue) { (LitStr(ref s)) => JsonValue::Literal(s.clone()), (Number(v)) => JsonValue::Number(v), (Object(ref value)) => value.clone(), (Array(ref list)) => JsonValue::Array(list.clone()), (True) => JsonValue::True, (False) => JsonValue::False, (Null) => JsonValue::Null, }
// ---- End of the grammar ----
use std::io::{BufRead,Read};
//As example, we parse stdin for a JSON object
fn main()
{
let stdin=std::io::stdin();
let mut buf=String::new();
stdin.lock().readtostring(&mut buf);
match Parser::
```rust //A very basic xml grammar
extern crate gramatica;
use std::cmp::Ordering; use gramatica::{Associativity,EarleyKind,State,Parser,ParsingTablesTrait,AmbiguityInfo};
// see https://www.w3.org/People/Bos/meta-bnf // also http://cs.lmu.edu/~ray/notes/xmlgrammar/
use std::rc::Rc;
//We define an auxiliar type to store XML elements
struct XMLElement
{
name: String,
attrs: Vec<(String,String)>,
contents: Vec
enum XMLContent { Element(XMLElement), Data(String), }
// ---- Start of the grammar ----
reterminal!(Space(String),"(\s|\n)+"); reterminal!(Ident(String),"[a-zA-Z\x80-\xff][a-zA-Z0-9\x80-\xff]*");
terminal LitStr(String)
{
fn _match(parser: &mut Parser
reterminal!(CloseEmpty,"/>"); reterminal!(BeginClose,"");
reterminal!(Equal,"="); reterminal!(LT,"<"); re_terminal!(GT,">");
re_terminal!(Other(char),".");
nonterminal Document(XMLElement) { (Element(ref elem)) => elem.clone(), }
nonterminal Element(XMLElement) { (EmptyElemTag(ref name,ref attrs)) => XMLElement{name:name.clone(),attrs:attrs.clone(),contents:vec![]}, (STag(ref name, ref attrs),Content(ref content),ETag) => XMLElement{name:name.clone(),attrs:attrs.clone(),contents:content.clone()}, }
nonterminal EmptyElemTag(String,Vec<(String,String)>) { (LT,Ident(ref name),Attributes(ref attrs),MaybeSpace,CloseEmpty) => (name.clone(),attrs.clone()), }
nonterminal Attributes(Vec<(String,String)>) { () => vec![], (Attributes(ref attrs),Space,Attribute(ref a, ref b)) => { let mut new=(attrs.clone()); new.push((a.clone(),b.clone())); new }, }
nonterminal Attribute(String,String) { (Ident(ref a),Equal,LitStr(ref b)) => (a.clone(),b.clone()), }
nonterminal STag(String,Vec<(String,String)>) { (LT,Ident(ref name),Attributes(ref attrs),MaybeSpace,GT) => (name.clone(),attrs.clone()), }
nonterminal ETag(String) { (BeginClose,Ident(ref s),MaybeSpace,GT) => s.clone(), }
nonterminal Content(Vec
nonterminal Contents(Vec
nonterminal MaybeSpace { () => (), (Space) => (), }
nonterminal CharData(String) { () => String::new(), (CharData(ref s),Space(ref o)) => format!("{}{}",s,o), (CharData(ref s),Ident(ref o)) => format!("{}{}",s,o), (CharData(ref s),Equal) => format!("{}=",s), (CharData(ref s),Other(o)) => format!("{}{}",s,o), }
// ---- End of the grammar ----
use std::io::{BufRead,Read};
//As example, we parse stdin for a XML element
fn main()
{
let stdin=std::io::stdin();
let mut buf=String::new();
stdin.lock().readtostring(&mut buf);
match Parser::