Basic validator for variables (#76)

This commit is contained in:
vms 2021-03-19 19:15:41 +03:00 committed by GitHub
parent acb00a1b07
commit 52af952dfd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1319 additions and 396 deletions

7
Cargo.lock generated
View File

@ -13,7 +13,7 @@ dependencies = [
[[package]]
name = "air-parser"
version = "0.5.0"
version = "0.6.0"
dependencies = [
"codespan",
"codespan-reporting",
@ -22,6 +22,7 @@ dependencies = [
"itertools 0.10.0",
"lalrpop",
"lalrpop-util",
"multimap",
"regex",
"serde",
"serde_json",
@ -1225,9 +1226,9 @@ checksum = "bba551d6d795f74a01767577ea8339560bf0a65354e0417b7e915ed608443d46"
[[package]]
name = "multimap"
version = "0.8.2"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1255076139a83bb467426e7f8d0134968a8118844faa755985e077cf31850333"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
dependencies = [
"serde",
]

View File

@ -1,6 +1,6 @@
[package]
name = "air-parser"
version = "0.5.0"
version = "0.6.0"
authors = ["Fluence Labs"]
edition = "2018"
license = "Apache-2.0"
@ -13,6 +13,7 @@ lalrpop-util = "0.19.5"
regex = "1.4.1"
codespan = "0.9.5"
codespan-reporting = "0.9.5"
multimap = "0.8.3"
# TODO: hide serde behind a feature
serde = { version = "=1.0.118", features = ["rc", "derive"] }

View File

@ -1,7 +1,9 @@
use crate::parser::ast::*;
use crate::parser::air_parser::into_variable_and_path;
use crate::parser::air_parser::make_flattened_error;
use crate::parser::lexer::LexerError;
use crate::parser::ParserError;
use crate::parser::VariableValidator;
use crate::parser::Span;
use crate::parser::lexer::Token;
use crate::parser::lexer::Number;
@ -9,27 +11,40 @@ use lalrpop_util::ErrorRecovery;
use std::rc::Rc;
// the only thing why input matters here is just introducing lifetime for Token
grammar<'err, 'input>(input: &'input str, errors: &'err mut Vec<ErrorRecovery<usize, Token<'input>, LexerError>>);
grammar<'err, 'input, 'v>(input: &'input str, errors: &'err mut Vec<ErrorRecovery<usize, Token<'input>, ParserError>>, validator: &'v mut VariableValidator<'input>);
pub AIR = Instr;
Instr: Box<Instruction<'input>> = {
"(" call <p:PeerPart> <f:FPart> <args:Args> <output:Output?> ")" => {
<left: @L> "(" call <p:PeerPart> <f:FPart> <args:Args> <output:Output?> ")" <right: @R> => {
let output = output.unwrap_or(CallOutputValue::None);
let args = Rc::new(args);
Box::new(Instruction::Call(Call{peer_part: p, function_part: f, args, output}))
let call = Call { peer_part: p, function_part: f, args, output };
let span = Span { left, right };
validator.met_call(&call, span);
Box::new(Instruction::Call(call))
},
"(" seq <l:Instr> <r:Instr> ")" => Box::new(Instruction::Seq(Seq(l, r))),
"(" par <l:Instr> <r:Instr> ")" => Box::new(Instruction::Par(Par(l, r))),
"(" null ")" => Box::new(Instruction::Null(Null)),
"(" fold <iterable:Iterable> <iterator:Alphanumeric> <i:Instr> ")" => {
<left: @L> "(" fold <iterable:Iterable> <iterator:Alphanumeric> <i:Instr> ")" <right: @R> => {
let instruction = Rc::new(*i);
Box::new(Instruction::Fold(Fold{ iterable, iterator, instruction }))
},
"(" next <i:Alphanumeric> ")" => Box::new(Instruction::Next(Next(i))),
let fold = Fold { iterable, iterator, instruction };
let span = Span { left, right };
validator.met_fold(&fold, span);
Box::new(Instruction::Fold(fold))
},
<left: @L> "(" next <i:Alphanumeric> ")" <right: @R> => {
let next = Next(i);
let span = Span { left, right };
validator.met_next(&next, span);
Box::new(Instruction::Next(next))
},
"(" xor <l:Instr> <r:Instr> ")" => Box::new(Instruction::Xor(Xor(l, r))),
@ -104,6 +119,7 @@ Iterable: IterableValue<'input> = {
<s:Alphanumeric> => IterableValue::Variable(s),
<v:JsonPath> => {
let (variable, path) = into_variable_and_path(v.0, v.1, v.2);
let should_flatten = v.2;
IterableValue::JsonPath { variable, path, should_flatten }
},
@ -121,7 +137,7 @@ Matchable: MatchableValue<'input> = {
extern {
type Location = usize;
type Error = LexerError;
type Error = ParserError;
enum Token<'input> {
"(" => Token::OpenRoundBracket,

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,10 @@ use super::ast::Instruction;
use super::lexer::AIRLexer;
use super::lexer::LexerError;
use super::lexer::Token;
use super::ParserError;
use crate::parser::VariableValidator;
use air::AIRParser;
use codespan_reporting::diagnostic::{Diagnostic, Label};
use codespan_reporting::files::SimpleFiles;
@ -26,8 +30,6 @@ use codespan_reporting::term;
use codespan_reporting::term::termcolor::{Buffer, ColorChoice, StandardStream};
use lalrpop_util::{ErrorRecovery, ParseError};
use air::AIRParser;
// Caching parser to cache internal regexes, which are expensive to instantiate
// See also https://github.com/lalrpop/lalrpop/issues/269
thread_local!(static PARSER: AIRParser = AIRParser::new());
@ -40,7 +42,13 @@ pub fn parse(air_script: &str) -> Result<Box<Instruction<'_>>, String> {
PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = AIRLexer::new(air_script);
match parser.parse(air_script, &mut errors, lexer) {
let mut validator = VariableValidator::new();
let result = parser.parse(air_script, &mut errors, &mut validator, lexer);
let validator_errors = validator.finalize();
errors.extend(validator_errors);
match result {
Ok(r) if errors.is_empty() => Ok(r),
Ok(_) => Err(report_errors(file_id, files, errors)),
Err(err) => Err(report_errors(
@ -58,7 +66,7 @@ pub fn parse(air_script: &str) -> Result<Box<Instruction<'_>>, String> {
fn report_errors(
file_id: usize,
files: SimpleFiles<&str, &str>,
errors: Vec<ErrorRecovery<usize, Token<'_>, LexerError>>,
errors: Vec<ErrorRecovery<usize, Token<'_>, ParserError>>,
) -> String {
let labels = errors_to_labels(file_id, errors);
let diagnostic = Diagnostic::error().with_labels(labels);
@ -78,7 +86,7 @@ fn report_errors(
fn errors_to_labels(
file_id: usize,
errors: Vec<ErrorRecovery<usize, Token<'_>, LexerError>>,
errors: Vec<ErrorRecovery<usize, Token<'_>, ParserError>>,
) -> Vec<Label<usize>> {
errors
.into_iter()
@ -98,7 +106,7 @@ fn errors_to_labels(
Label::primary(file_id, location..(location + 1))
.with_message(format!("expected {}", pretty_expected(expected)))
}
ParseError::User { error } => lexical_error_to_label(file_id, error),
ParseError::User { error } => parser_error_to_label(file_id, error),
})
.collect()
}
@ -111,6 +119,23 @@ fn pretty_expected(expected: Vec<String>) -> String {
}
}
fn parser_error_to_label(file_id: usize, error: ParserError) -> Label<usize> {
use ParserError::*;
match error {
LexerError(error) => lexical_error_to_label(file_id, error),
CallArgsNotFlattened(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
UndefinedIterable(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
UndefinedVariable(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
}
}
fn lexical_error_to_label(file_id: usize, error: LexerError) -> Label<usize> {
use LexerError::*;
match error {
@ -147,9 +172,6 @@ fn lexical_error_to_label(file_id: usize, error: LexerError) -> Label<usize> {
LeadingDot(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
CallArgsNotFlattened(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
}
}
@ -167,8 +189,8 @@ pub(super) fn make_flattened_error(
start_pos: usize,
token: Token<'_>,
end_pos: usize,
) -> ErrorRecovery<usize, Token<'_>, LexerError> {
let error = LexerError::CallArgsNotFlattened(start_pos, end_pos);
) -> ErrorRecovery<usize, Token<'_>, ParserError> {
let error = ParserError::CallArgsNotFlattened(start_pos, end_pos);
let error = ParseError::User { error };
let dropped_tokens = vec![(start_pos, token, end_pos)];

View File

@ -0,0 +1,39 @@
/*
* Copyright 2020 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::parser::lexer::LexerError;
use thiserror::Error as ThisError;
#[derive(ThisError, Debug, Clone, PartialEq, Eq)]
pub enum ParserError {
#[error("{0}")]
LexerError(#[from] LexerError),
#[error("while using json path in call triplet, result should be flattened, add ! at the end")]
CallArgsNotFlattened(usize, usize),
#[error("variable '{2}' wasn't defined")]
UndefinedVariable(usize, usize, String),
#[error("iterable '{2}' wasn't defined")]
UndefinedIterable(usize, usize, String),
}
impl From<std::convert::Infallible> for ParserError {
fn from(_: std::convert::Infallible) -> Self {
unreachable!()
}
}

View File

@ -53,9 +53,29 @@ pub enum LexerError {
#[error("leading dot without any symbols before - please write 0 if it's float or variable name if it's json path")]
LeadingDot(usize, usize),
}
#[error("while using json path in call triplet, result should be flattened, add ! at the end")]
CallArgsNotFlattened(usize, usize),
use super::Token;
use crate::parser::air::__ToTriple;
use crate::parser::ParserError;
impl<'err, 'input, 'i> __ToTriple<'err, 'input, 'i>
for Result<(usize, Token<'input>, usize), LexerError>
{
fn to_triple(
value: Self,
) -> Result<
(usize, Token<'input>, usize),
lalrpop_util::ParseError<usize, Token<'input>, ParserError>,
> {
match value {
Ok(v) => Ok(v),
Err(error) => {
let error = ParserError::LexerError(error);
Err(lalrpop_util::ParseError::User { error })
}
}
}
}
impl From<std::convert::Infallible> for LexerError {

View File

@ -23,6 +23,8 @@ mod lexer;
mod air;
pub mod ast;
mod errors;
mod validator;
#[cfg(test)]
pub mod tests;
@ -30,3 +32,12 @@ pub mod tests;
pub use self::air_parser::parse;
pub use air::AIRParser;
pub use lexer::AIRLexer;
use errors::ParserError;
use validator::VariableValidator;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub left: usize,
pub right: usize,
}

View File

@ -15,13 +15,25 @@
*/
use crate::ast;
use crate::parser::AIRParser;
use crate::parser::ParserError;
use ast::Instruction;
use fstrings::f;
use lalrpop_util::ParseError;
use std::rc::Rc;
thread_local!(static TEST_PARSER: AIRParser = AIRParser::new());
fn parse(source_code: &str) -> Instruction {
*crate::parse(source_code).expect("parsing failed")
*TEST_PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = crate::parser::AIRLexer::new(source_code);
let mut validator = crate::parser::VariableValidator::new();
parser
.parse(source_code, &mut errors, &mut validator, lexer)
.expect("parsing should be successfull")
})
}
#[test]
@ -143,7 +155,7 @@ fn parse_json_path() {
}
#[test]
fn parse_json_path_without_flattening() {
fn parse_undefined_variable() {
let source_code = r#"
(call id.$.a "f" ["hello" name] void[])
"#;
@ -152,12 +164,59 @@ fn parse_json_path_without_flattening() {
let parser = crate::AIRParser::new();
let mut errors = Vec::new();
let mut validator = super::VariableValidator::new();
parser
.parse(source_code, &mut errors, lexer)
.parse(source_code, &mut errors, &mut validator, lexer)
.expect("parser shoudn't fail");
let errors = validator.finalize();
assert_eq!(errors.len(), 2);
for i in 0..2 {
let error = &errors[i].error;
let parser_error = match error {
ParseError::User { error } => error,
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedVariable(..)));
}
}
#[test]
fn parse_undefined_iterable() {
let source_code = r#"
(seq
(call "" ("" "") [] iterable)
(fold iterable i
(seq
(call "" ("" "") ["hello" ""] void[])
(next j)
)
)
)
"#;
let lexer = crate::AIRLexer::new(source_code);
let parser = crate::AIRParser::new();
let mut errors = Vec::new();
let mut validator = super::VariableValidator::new();
parser
.parse(source_code, &mut errors, &mut validator, lexer)
.expect("parser shoudn't fail");
let errors = validator.finalize();
assert_eq!(errors.len(), 1);
assert!(matches!(errors[0], lalrpop_util::ErrorRecovery { .. }));
let error = &errors[0].error;
let parser_error = match error {
ParseError::User { error } => error,
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedIterable(..)));
}
#[test]

View File

@ -0,0 +1,239 @@
/*
* Copyright 2020 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use super::ast::*;
use crate::parser::lexer::Token;
use crate::parser::ParserError;
use crate::parser::Span;
use lalrpop_util::ErrorRecovery;
use lalrpop_util::ParseError;
use multimap::MultiMap;
use std::collections::HashMap;
/// Intermediate implementation of variable validator.
///
/// It is intended to track variables (i.e., those that were defined as
/// a result of the `call` instruction) and iterables (i.e., those X's defined
/// in a `fold array X` call).
///
/// Validator will catch any undefined variables or iterables and raise an error.
#[derive(Debug, Default, Clone)]
pub struct VariableValidator<'i> {
/// Contains the most left definition of a variables met in call outputs.
met_variables: HashMap<&'i str, Span>,
/// Contains iterables met in fold iterables.
met_iterators: MultiMap<&'i str, Span>,
/// These variables from calls and folds haven't been resolved at the first meet.
unresolved_variables: MultiMap<&'i str, Span>,
/// Contains all met iterable in call and next, they will be resolved after the whole parsing
/// due to the way how lalrpop work.
unresolved_iterables: MultiMap<&'i str, Span>,
}
impl<'i> VariableValidator<'i> {
pub(super) fn new() -> Self {
<_>::default()
}
pub(super) fn met_call(&mut self, call: &Call<'i>, span: Span) {
self.met_peer_part(&call.peer_part, span);
self.met_function_part(&call.function_part, span);
self.met_args(&call.args, span);
self.met_call_output_definition(&call.output, span)
}
pub(super) fn met_fold(&mut self, fold: &Fold<'i>, span: Span) {
self.met_iterable_value(&fold.iterable, span);
self.met_iterator_definition(&fold.iterator, span);
}
pub(super) fn met_next(&mut self, next: &Next<'i>, span: Span) {
let iterable_name = next.0;
// due to the right to left convolution in lalrpop, next will be met earlier than
// a corresponding fold with the definition of this iterable, so they're just put
// without a check for being already met
self.unresolved_iterables.insert(iterable_name, span);
}
pub(super) fn finalize<'err>(&self) -> Vec<ErrorRecovery<usize, Token<'i>, ParserError>> {
let mut errors = Vec::new();
for (name, span) in self.unresolved_variables.iter() {
if !self.contains_variable(name, *span) {
add_to_errors(*name, &mut errors, *span, Token::Call);
}
}
for (name, span) in self.unresolved_iterables.iter() {
if !self.contains_iterable(name, *span) {
add_to_errors(*name, &mut errors, *span, Token::Next);
}
}
errors
}
fn met_peer_part(&mut self, peer_part: &PeerPart<'i>, span: Span) {
match peer_part {
PeerPart::PeerPk(peer_pk) => self.met_instr_value(peer_pk, span),
PeerPart::PeerPkWithServiceId(peer_pk, service_id) => {
self.met_instr_value(peer_pk, span);
self.met_instr_value(service_id, span);
}
}
}
fn met_function_part(&mut self, function_part: &FunctionPart<'i>, span: Span) {
match function_part {
FunctionPart::FuncName(func_name) => self.met_instr_value(func_name, span),
FunctionPart::ServiceIdWithFuncName(service_id, func_name) => {
self.met_instr_value(service_id, span);
self.met_instr_value(func_name, span);
}
}
}
fn met_args(&mut self, args: &[CallInstrArgValue<'i>], span: Span) {
for arg in args {
self.met_instr_arg_value(arg, span);
}
}
fn met_instr_value(&mut self, instr_value: &CallInstrValue<'i>, span: Span) {
match instr_value {
CallInstrValue::JsonPath { variable, .. } => self.met_variable(variable, span),
CallInstrValue::Variable(variable) => self.met_variable(variable, span),
_ => {}
}
}
fn met_instr_arg_value(&mut self, instr_arg_value: &CallInstrArgValue<'i>, span: Span) {
match instr_arg_value {
CallInstrArgValue::JsonPath { variable, .. } => self.met_variable(variable, span),
CallInstrArgValue::Variable(variable) => self.met_variable(variable, span),
_ => {}
}
}
fn met_variable(&mut self, name: &'i str, span: Span) {
if !self.contains_variable(name, span) {
self.unresolved_variables.insert(name, span);
}
}
fn contains_variable(&self, key: &str, key_span: Span) -> bool {
if let Some(found_span) = self.met_variables.get(key) {
if found_span < &key_span {
return true;
}
}
let found_spans = match self.met_iterators.get_vec(key) {
Some(found_spans) => found_spans,
None => return false,
};
found_spans.iter().any(|s| s < &key_span)
}
fn met_call_output_definition(&mut self, call_output: &CallOutputValue<'i>, span: Span) {
use std::collections::hash_map::Entry;
let variable_name = match call_output {
CallOutputValue::Scalar(variable) => variable,
CallOutputValue::Accumulator(accumulator) => accumulator,
CallOutputValue::None => return,
};
match self.met_variables.entry(variable_name) {
Entry::Occupied(occupied) => {
if occupied.get() > &span {
*occupied.into_mut() = span;
}
}
Entry::Vacant(vacant) => {
vacant.insert(span);
}
}
}
/// Checks that multimap contains a span for given key such that provided span lies inside it.
fn contains_iterable(&self, key: &str, key_span: Span) -> bool {
let found_spans = match self.met_iterators.get_vec(key) {
Some(found_spans) => found_spans,
None => return false,
};
found_spans
.iter()
.any(|s| s.left < key_span.left && s.right > key_span.right)
}
fn met_iterable_value(&mut self, iterable_value: &IterableValue<'i>, span: Span) {
match iterable_value {
IterableValue::JsonPath { variable, .. } => self.met_variable(variable, span),
IterableValue::Variable(variable) => self.met_variable(variable, span),
}
}
fn met_iterator_definition(&mut self, iterator: &'i str, span: Span) {
self.met_iterators.insert(iterator, span);
}
}
use std::cmp::Ordering;
impl PartialOrd for Span {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let self_min = std::cmp::min(self.left, self.right);
let other_min = std::cmp::min(other.left, other.right);
if self_min < other_min {
Some(Ordering::Less)
} else if self == other {
Some(Ordering::Equal)
} else {
Some(Ordering::Greater)
}
}
}
fn add_to_errors<'err, 'i>(
variable_name: impl Into<String>,
errors: &'err mut Vec<ErrorRecovery<usize, Token<'i>, ParserError>>,
span: Span,
token: Token<'i>,
) {
let variable_name = variable_name.into();
let error = match token {
Token::Next => ParserError::UndefinedIterable(span.left, span.right, variable_name),
_ => ParserError::UndefinedVariable(span.left, span.right, variable_name),
};
let error = ParseError::User { error };
let dropped_tokens = vec![(span.left, token, span.right)];
let error = ErrorRecovery {
error,
dropped_tokens,
};
errors.push(error);
}

View File

@ -175,7 +175,7 @@ mod tests {
let remote_peer_id = String::from("some_remote_peer_id");
let script = format!(
r#"(call "{}" ("local_service_id" "local_fn_name") [value] result_name)"#,
r#"(call "{}" ("local_service_id" "local_fn_name") ["arg"] result_name)"#,
remote_peer_id
);

View File

@ -347,9 +347,6 @@ mod tests {
// Check that fold works with the join behaviour without hanging up.
#[test]
fn fold_with_join() {
use crate::contexts::execution_trace::CallResult::*;
use crate::contexts::execution_trace::ExecutedState::*;
let mut vm = create_aqua_vm(echo_number_call_service(), "A");
let mut set_variable_vm = create_aqua_vm(set_variable_call_service(r#"["1","2"]"#), "set_variable");
@ -357,10 +354,13 @@ mod tests {
r#"
(seq
(call "set_variable" ("" "") [] iterable)
(fold iterable i
(seq
(call "A" ("" "") [non_exist_variable.$.hash!] acc[])
(next i)
(par
(call "unknown_peer" ("" "") [] lazy_def_variable)
(fold iterable i
(seq
(call "A" ("" "") [lazy_def_variable.$.hash!] acc[])
(next i)
)
)
)
)"#,
@ -370,8 +370,7 @@ mod tests {
let res = call_vm!(vm, "", fold_with_join, "", res.data);
let res: ExecutionTrace = serde_json::from_slice(&res.data).expect("should be valid executed trace");
assert_eq!(res.len(), 1);
assert_eq!(res[0], Call(Executed(Rc::new(json!(["1", "2"])))));
assert_eq!(res.len(), 3);
}
#[test]

View File

@ -104,6 +104,8 @@ mod tests {
#[test]
fn xor_var_not_found() {
use crate::contexts::execution_trace::CallResult::*;
use crate::contexts::execution_trace::ExecutedState::*;
use aqua_test_utils::echo_string_call_service;
let local_peer_id = "local_peer_id";
@ -112,7 +114,10 @@ mod tests {
let script = format!(
r#"
(xor
(call "{0}" ("service_id_1" "local_fn_name") [non_existent_variable] result)
(par
(call "unknown_peer" ("service_id_1" "local_fn_name") [] lazy_defined_variable)
(call "{0}" ("service_id_1" "local_fn_name") [lazy_defined_variable] result)
)
(call "{0}" ("service_id_2" "local_fn_name") ["expected"] result)
)"#,
local_peer_id,
@ -120,9 +125,8 @@ mod tests {
let res = call_vm!(vm, "asd", script, "[]", "[]");
let actual_trace: ExecutionTrace = serde_json::from_slice(&res.data).expect("should be valid json");
assert!(actual_trace.is_empty());
assert!(res.next_peer_pks.is_empty());
assert_eq!(actual_trace[0], Par(1, 0));
assert_eq!(actual_trace[1], Call(RequestSentBy(String::from("local_peer_id"))));
}
#[test]