Improve non iterable scalar scoping (#243)

This commit is contained in:
Mike Voronov 2022-04-15 22:25:03 +03:00 committed by GitHub
parent f1600075d8
commit 06d275ea16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1008 additions and 258 deletions

View File

@ -1,3 +1,14 @@
## Version 0.22.0 (2021-04-14)
[PR 243](https://github.com/fluencelabs/aquavm/pull/243):
Clean scalars at the end of scope, only one next in a fold over scalar is possible now
[PR 231](https://github.com/fluencelabs/aquavm/pull/231):
Test refactoring
[PR 228](https://github.com/fluencelabs/aquavm/pull/228):
Improve stream determinism
## Version 0.21.0 (2021-02-26)
[PR 225](https://github.com/fluencelabs/aquavm/pull/225):

5
Cargo.lock generated
View File

@ -13,7 +13,7 @@ dependencies = [
[[package]]
name = "air"
version = "0.21.0"
version = "0.22.0"
dependencies = [
"air-execution-info-collector",
"air-interpreter-data",
@ -34,6 +34,7 @@ dependencies = [
"log",
"maplit",
"marine-rs-sdk",
"non-empty-vec",
"once_cell",
"polyplets",
"pretty_assertions",
@ -51,7 +52,7 @@ version = "0.1.0"
[[package]]
name = "air-interpreter"
version = "0.21.0"
version = "0.22.0"
dependencies = [
"air",
"air-log-targets",

View File

@ -1,6 +1,6 @@
[package]
name = "air-interpreter"
version = "0.21.0"
version = "0.22.0"
description = "Crate-wrapper for air"
authors = ["Fluence Labs"]
edition = "2018"

View File

@ -1,6 +1,6 @@
[package]
name = "air"
version = "0.21.0"
version = "0.22.0"
description = "Interpreter of AIR scripts intended to coordinate request flow in the Fluence network"
authors = ["Fluence Labs"]
edition = "2018"
@ -32,6 +32,7 @@ serde_json = "1.0.61"
boolinator = "2.4.0"
concat-idents = "1.1.3"
maplit = "1.0.2"
non-empty-vec = "0.2.3"
log = "0.4.11"
fstrings = "0.2.3"
thiserror = "1.0.23"

View File

@ -90,7 +90,7 @@ fn update_context<'ctx>(
exec_ctx
.streams
.add_stream_value(result, generation, stream.name, stream.position)
.map(|generation| Some(generation))
.map(Some)
}
}
}
@ -107,6 +107,6 @@ fn maybe_update_trace(
return;
}
let final_ap_result = to_ap_result(&merger_ap_result, maybe_generation);
let final_ap_result = to_ap_result(merger_ap_result, maybe_generation);
trace_ctx.meet_ap_end(final_ap_result);
}

View File

@ -41,8 +41,10 @@ impl<'i> super::ExecutableInstruction<'i> for Next<'i> {
let next_instr = fold_state.instr_head.clone();
maybe_meet_iteration_start(self, fold_state, trace_ctx)?;
exec_ctx.scalars.meet_next_before();
next_instr.execute(exec_ctx, trace_ctx)?;
exec_ctx.scalars.meet_next_after();
// get the same fold state again because of borrow checker
let fold_state = exec_ctx.scalars.get_iterable_mut(iterator_name)?;

View File

@ -20,9 +20,14 @@ use crate::execution_step::ExecutionResult;
use crate::execution_step::FoldState;
use crate::execution_step::ValueAggregate;
use non_empty_vec::NonEmpty;
use std::collections::HashMap;
use std::collections::HashSet;
use std::rc::Rc;
// TODO: move this code snippet to documentation when it's ready
/// There are two scopes for variable scalars in AIR: global and local. A local scope
/// is a scope inside every fold block, other scope is a global. It means that scalar
/// in an upper fold block could be shadowed by a scalar with the same name in a lower
@ -62,11 +67,49 @@ use std::rc::Rc;
/// This struct is intended to provide abilities to work with scalars as it was described.
#[derive(Default)]
pub(crate) struct Scalars<'i> {
// this one is optimized for speed (not for memory), because it's unexpected
// that a script could have a lot of inner folds.
pub values: HashMap<String, Vec<Option<ValueAggregate>>>,
pub iterable_values: HashMap<String, FoldState<'i>>,
pub fold_block_id: usize,
// TODO: use Rc<String> to avoid copying
/// Terminology used here (mainly to resolve concerns re difference between scalars and values):
/// - scalar is an AIR scalar, iterable and non iterable. A scalar is addressed by a name.
/// - value is concrete value assigned to scalar on certain depth
/// - scope is a variable scope where variable is visible. If we consider fold as a tree where
/// each next produces a new level, then scope is a level in this tree. Please note that it
/// includes variable defined after next instruction.
/// - depth is a count of seen scopes (or a depth in a tree met in the previous definition)
///
/// Non iterable variables hash map could be recognized as a sparse matrix, where a row
/// corresponds to a variable name and contains all its values were set with respect to a depth.
/// A column corresponds to a depth and contains all values were set at current depth.
///
/// This matrix follows these invariants:
/// - all rows are non empty
/// - global variables have 0 depth
/// - cells in a row are sorted by depth
/// - all depths in cell in one row are unique
pub(crate) non_iterable_variables: HashMap<String, NonEmpty<SparseCell>>,
/// This set contains depths were invalidated at the certain moment of script execution.
/// They are needed for careful isolation of scopes produced by iterations in fold blocks,
/// precisely to limit access of non iterable variables defined on one depths to ones
/// defined on another.
pub(crate) invalidated_depths: HashSet<usize>,
pub(crate) iterable_variables: HashMap<String, FoldState<'i>>,
/// Count of met scopes at the particular moment of execution.
pub(crate) current_depth: usize,
}
#[derive(Debug)]
pub(crate) struct SparseCell {
/// Scope depth where the value was set.
pub(crate) depth: usize,
pub(crate) value: ValueAggregate,
}
impl SparseCell {
pub(crate) fn new(depth: usize, value: ValueAggregate) -> Self {
Self { depth, value }
}
}
impl<'i> Scalars<'i> {
@ -76,11 +119,11 @@ impl<'i> Scalars<'i> {
use std::collections::hash_map::Entry::{Occupied, Vacant};
let shadowing_allowed = self.shadowing_allowed();
match self.values.entry(name.into()) {
match self.non_iterable_variables.entry(name.into()) {
Vacant(entry) => {
let mut values = vec![None; self.fold_block_id];
values.push(Some(value));
entry.insert(values);
let cell = SparseCell::new(self.current_depth, value);
let cells = NonEmpty::new(cell);
entry.insert(cells);
Ok(false)
}
@ -90,14 +133,16 @@ impl<'i> Scalars<'i> {
}
let values = entry.into_mut();
let contains_prev_value = values
.get(self.fold_block_id)
.map_or_else(|| false, |value| value.is_none());
// could be considered as lazy erasing
values.resize(self.fold_block_id + 1, None);
values[self.fold_block_id] = Some(value);
Ok(contains_prev_value)
let last_cell = values.last_mut();
if last_cell.depth == self.current_depth {
// just rewrite a value if fold level is the same
last_cell.value = value;
Ok(true)
} else {
let new_cell = SparseCell::new(self.current_depth, value);
values.push(new_cell);
Ok(false)
}
}
}
}
@ -109,7 +154,7 @@ impl<'i> Scalars<'i> {
) -> ExecutionResult<()> {
use std::collections::hash_map::Entry::{Occupied, Vacant};
match self.iterable_values.entry(name.into()) {
match self.iterable_variables.entry(name.into()) {
Vacant(entry) => {
entry.insert(fold_state);
Ok(())
@ -119,31 +164,34 @@ impl<'i> Scalars<'i> {
}
pub(crate) fn remove_iterable_value(&mut self, name: &str) {
self.iterable_values.remove(name);
self.iterable_variables.remove(name);
}
pub(crate) fn get_value(&'i self, name: &str) -> ExecutionResult<&'i ValueAggregate> {
self.values
self.non_iterable_variables
.get(name)
.and_then(|scalars| {
scalars
.iter()
.take(self.fold_block_id + 1)
.rev()
.find_map(|scalar| scalar.as_ref())
.and_then(|values| {
let last_cell = values.last();
let value_not_invalidated = !self.invalidated_depths.contains(&last_cell.depth);
if value_not_invalidated {
Some(&last_cell.value)
} else {
None
}
})
.ok_or_else(|| Rc::new(CatchableError::VariableNotFound(name.to_string())).into())
}
pub(crate) fn get_iterable_mut(&mut self, name: &str) -> ExecutionResult<&mut FoldState<'i>> {
self.iterable_values
self.iterable_variables
.get_mut(name)
.ok_or_else(|| UncatchableError::FoldStateNotFound(name.to_string()).into())
}
pub(crate) fn get(&'i self, name: &str) -> ExecutionResult<ScalarRef<'i>> {
let value = self.get_value(name);
let iterable_value = self.iterable_values.get(name);
let iterable_value = self.iterable_variables.get(name);
match (value, iterable_value) {
(Err(_), None) => Err(CatchableError::VariableNotFound(name.to_string()).into()),
@ -154,44 +202,76 @@ impl<'i> Scalars<'i> {
}
pub(crate) fn meet_fold_start(&mut self) {
self.fold_block_id += 1;
self.current_depth += 1;
}
// meet next before recursion
pub(crate) fn meet_next_before(&mut self) {
self.invalidated_depths.insert(self.current_depth);
self.current_depth += 1;
}
// meet next after recursion
pub(crate) fn meet_next_after(&mut self) {
self.current_depth -= 1;
self.invalidated_depths.remove(&self.current_depth);
self.cleanup_obsolete_values();
}
pub(crate) fn meet_fold_end(&mut self) {
self.fold_block_id -= 1;
if self.fold_block_id == 0 {
// lazy cleanup after exiting from a top fold block to the global scope
self.cleanup()
}
self.current_depth -= 1;
self.cleanup_obsolete_values();
}
pub(crate) fn shadowing_allowed(&self) -> bool {
// shadowing is allowed only inside a fold block, 0 here means that execution flow
// is in a global scope
self.fold_block_id != 0
self.current_depth != 0
}
fn cleanup(&mut self) {
for (_, scalars) in self.values.iter_mut() {
scalars.truncate(self.fold_block_id + 1)
fn cleanup_obsolete_values(&mut self) {
// TODO: it takes O(N) where N is a count of all scalars, but it could be optimized
// by maintaining array of value indices that should be removed on each depth level
let mut values_to_delete = Vec::new();
for (name, values) in self.non_iterable_variables.iter_mut() {
let value_depth = values.last().depth;
if !is_global_value(value_depth) && is_value_obsolete(value_depth, self.current_depth) {
// it can't be empty, so it returns None if it contains 1 element
if values.pop().is_none() {
// TODO: optimize this cloning in next PR
values_to_delete.push(name.to_string());
}
}
}
for value_name in values_to_delete {
self.non_iterable_variables.remove(&value_name);
}
}
}
fn is_global_value(current_scope_depth: usize) -> bool {
current_scope_depth == 0
}
fn is_value_obsolete(value_depth: usize, current_scope_depth: usize) -> bool {
value_depth > current_scope_depth
}
use std::fmt;
impl<'i> fmt::Display for Scalars<'i> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "fold_block_id: {}", self.fold_block_id)?;
writeln!(f, "fold_block_id: {}", self.current_depth)?;
for (name, _) in self.values.iter() {
for (name, _) in self.non_iterable_variables.iter() {
let value = self.get_value(name);
if let Ok(last_value) = value {
writeln!(f, "{} => {}", name, last_value.result)?;
}
}
for (name, _) in self.iterable_values.iter() {
for (name, _) in self.iterable_variables.iter() {
// it's impossible to print an iterable value for now
writeln!(f, "{} => iterable", name)?;
}
@ -199,3 +279,46 @@ impl<'i> fmt::Display for Scalars<'i> {
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
use polyplets::SecurityTetraplet;
use serde_json::json;
use std::num::NonZeroUsize;
use std::rc::Rc;
#[test]
fn test_local_cleanup() {
let mut scalars = Scalars::default();
let tetraplet = SecurityTetraplet::default();
let rc_tetraplet = Rc::new(tetraplet);
let value = json!(1u64);
let rc_value = Rc::new(value);
let value_aggregate = ValueAggregate::new(rc_value, rc_tetraplet, 1);
let value_1_name = "name_1";
scalars.set_value(value_1_name, value_aggregate.clone()).unwrap();
let value_2_name = "name_2";
scalars.meet_fold_start();
scalars.set_value(value_2_name, value_aggregate.clone()).unwrap();
scalars.meet_fold_start();
scalars.set_value(value_2_name, value_aggregate.clone()).unwrap();
let expected_values_count = scalars.non_iterable_variables.get(value_2_name).unwrap().len();
assert_eq!(expected_values_count, NonZeroUsize::new(2).unwrap());
scalars.meet_fold_end();
let expected_values_count = scalars.non_iterable_variables.get(value_2_name).unwrap().len();
assert_eq!(expected_values_count, NonZeroUsize::new(1).unwrap());
scalars.meet_fold_end();
assert!(scalars.non_iterable_variables.get(value_2_name).is_none());
let expected_values_count = scalars.non_iterable_variables.get(value_1_name).unwrap().len();
assert_eq!(expected_values_count, NonZeroUsize::new(1).unwrap());
}
}

View File

@ -19,5 +19,6 @@ mod errors;
mod join_behaviour;
mod lambda;
mod misc;
mod scopes;
mod streams;
mod tetraplets;

View File

@ -0,0 +1,17 @@
/*
* Copyright 2022 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
mod scalars_scope;

View File

@ -0,0 +1,212 @@
/*
* Copyright 2022 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use air_test_utils::prelude::*;
use fstrings::f;
use fstrings::format_args_f;
#[test]
fn scalars_scope() {
let peer_1_id = "peer_1_id";
let array_1_content = json!(["1", "2"]);
let mut peer_1_vm = create_avm(set_variable_call_service(array_1_content.clone()), peer_1_id);
let some_peer_id = "some_peer_id";
let mut some_peer_vm = create_avm(unit_call_service(), some_peer_id);
let set_array_0_peer_id = "set_array_0_peer_id";
let peer_2_id = "peer_2_id";
let peers = json!([peer_1_id, peer_2_id]);
let mut set_array_0_vm = create_avm(set_variable_call_service(peers.clone()), set_array_0_peer_id);
let script = f!(r#"
(seq
(call "{set_array_0_peer_id}" ("" "") [] array-0)
(fold array-0 array-0-iterator
(seq
(par
(call array-0-iterator ("" "") [] array-1)
(null)
)
(seq
(fold array-1 array-1-iterator
(seq
(call "{some_peer_id}" ("" "") [])
(next array-1-iterator)
)
)
(next array-0-iterator)
)
)
)
)"#);
let init_peer_id = "";
let result = checked_call_vm!(set_array_0_vm, init_peer_id, &script, "", "");
let result = checked_call_vm!(peer_1_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(some_peer_vm, init_peer_id, &script, "", result.data);
let actual_trace = trace_from_result(&result);
let expected_trace = vec![
executed_state::scalar(peers),
executed_state::par(1, 0),
executed_state::scalar(array_1_content),
executed_state::scalar_string("result from unit_call_service"),
executed_state::scalar_string("result from unit_call_service"),
executed_state::par(1, 0),
executed_state::request_sent_by(some_peer_id),
];
assert_eq!(actual_trace, expected_trace);
}
#[test]
fn before_after_of_next() {
let set_array_0_peer_id = "set_array_0_peer_id";
let array_0_content = json!([1, 2, 3]);
let mut set_array_0_vm = create_avm(set_variable_call_service(array_0_content.clone()), set_array_0_peer_id);
let vm_peer_0_id = "vm_peer_0_id";
let counter = std::cell::Cell::new(0);
let vm_peer_0_call_service: CallServiceClosure = Box::new(move |_params| {
let uncelled_request_id = counter.get();
counter.set(uncelled_request_id + 1);
CallServiceResult::ok(json!(uncelled_request_id))
});
let mut peer_0_vm = create_avm(vm_peer_0_call_service, vm_peer_0_id);
let vm_peer_1_id = "vm_peer_1_id";
let mut peer_1_vm = create_avm(echo_call_service(), vm_peer_1_id);
let script = f!(r#"
(seq
(call "{set_array_0_peer_id}" ("" "") [] array-0)
(fold array-0 array-0-iterator
(seq
(call "{vm_peer_0_id}" ("" "") [] local)
(seq
(next array-0-iterator)
(call "{vm_peer_1_id}" ("" "") [local])
)
)
)
)"#);
let init_peer_id = "";
let result = checked_call_vm!(set_array_0_vm, init_peer_id, &script, "", "");
let result = checked_call_vm!(peer_0_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(peer_1_vm, init_peer_id, &script, "", result.data);
let actual_trace = trace_from_result(&result);
let expected_trace = vec![
executed_state::scalar(array_0_content),
executed_state::scalar_number(0),
executed_state::scalar_number(1),
executed_state::scalar_number(2),
executed_state::scalar_number(2),
executed_state::scalar_number(1),
executed_state::scalar_number(0),
];
assert_eq!(actual_trace, expected_trace);
}
#[test]
fn local_and_global_scalars() {
let set_variable_peer_id = "set_variable_peer_id";
let iterable_content = json!([1i64, 2]);
let mut set_variable_vm = create_avm(
set_variable_call_service(iterable_content.clone()),
set_variable_peer_id,
);
let local_setter_peer_id = "local_setter_peer_id";
let counter = std::cell::Cell::new(0);
let local_setter_call_service: CallServiceClosure = Box::new(move |_params| {
let uncelled_request_id = counter.get();
counter.set(uncelled_request_id + 1);
CallServiceResult::ok(json!(uncelled_request_id))
});
let mut local_setter_vm = create_avm(local_setter_call_service, local_setter_peer_id);
let local_consumer_peer_id = "local_consumer_peer_id";
let mut local_consumer_vm = create_avm(echo_call_service(), local_consumer_peer_id);
let script = f!(r#"
(seq
(seq
(seq
(call "{set_variable_peer_id}" ("" "") [] iterable_1)
(call "{set_variable_peer_id}" ("" "") [] iterable_2)
)
(seq
(call "{local_setter_peer_id}" ("" "") [] local) ;; (1)
(fold iterable_1 iterator_1
(seq
(seq
(seq
(call "{local_setter_peer_id}" ("" "") [] local) ;; (2)
(fold iterable_2 iterator_2
(seq
(seq
(call "{local_setter_peer_id}" ("" "") [] local) ;; (3)
(call "{local_consumer_peer_id}" ("" "") [local]) ;; local set by (3) will be used
)
(next iterator_2)
)
)
)
(call "{local_consumer_peer_id}" ("" "") [local]) ;; local set by (2) will be used
)
(next iterator_1)
)
)
)
)
(call "{local_consumer_peer_id}" ("" "") [local]) ;; local set by (1) will be used
)"#);
let init_peer_id = "";
let result = checked_call_vm!(set_variable_vm, init_peer_id, &script, "", "");
let result = checked_call_vm!(local_setter_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_consumer_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_setter_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_consumer_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_setter_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_consumer_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_setter_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(local_consumer_vm, init_peer_id, &script, "", result.data);
let actual_trace = trace_from_result(&result);
let expected_trace = vec![
executed_state::scalar(iterable_content.clone()),
executed_state::scalar(iterable_content.clone()),
executed_state::scalar_number(0),
executed_state::scalar_number(1),
executed_state::scalar_number(2),
executed_state::scalar_number(2),
executed_state::scalar_number(3),
executed_state::scalar_number(3),
executed_state::scalar_number(1),
executed_state::scalar_number(4),
executed_state::scalar_number(5),
executed_state::scalar_number(5),
executed_state::scalar_number(6),
executed_state::scalar_number(6),
executed_state::scalar_number(4),
executed_state::scalar_number(0),
];
assert_eq!(actual_trace, expected_trace);
}

View File

@ -0,0 +1,72 @@
/*
* Copyright 2022 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use air_test_utils::prelude::*;
#[test]
// test for github.com/fluencelabs/aquavm/issues/241
fn issue_241() {
let peer_1_id = "peer_1_id";
let array_1_content = json!(["1", "2"]);
let mut peer_1_vm = create_avm(set_variable_call_service(array_1_content.clone()), peer_1_id);
let some_peer_id = "some_peer_id";
let mut some_peer_vm = create_avm(unit_call_service(), some_peer_id);
let set_array_0_peer_id = "set_array_0_peer_id";
let peer_2_id = "peer_2_id";
let peers = json!([peer_1_id, peer_2_id]);
let mut set_array_0_vm = create_avm(set_variable_call_service(peers.clone()), set_array_0_peer_id);
let script = f!(r#"
(seq
(call "{set_array_0_peer_id}" ("" "") [] array-0)
(fold array-0 array-0-iterator
(par
(call array-0-iterator ("" "") [] array-1)
(seq
(fold array-1 array-1-iterator
(seq
(call "{some_peer_id}" ("" "") [])
(next array-1-iterator)
)
)
(next array-0-iterator)
)
)
)
)
"#);
let init_peer_id = "";
let result = checked_call_vm!(set_array_0_vm, init_peer_id, &script, "", "");
let result = checked_call_vm!(peer_1_vm, init_peer_id, &script, "", result.data);
let result = checked_call_vm!(some_peer_vm, init_peer_id, &script, "", result.data);
let actual_trace = trace_from_result(&result);
let expected_trace = vec![
executed_state::scalar(peers),
executed_state::par(1, 4),
executed_state::scalar(array_1_content),
executed_state::scalar_string("result from unit_call_service"),
executed_state::scalar_string("result from unit_call_service"),
executed_state::par(1, 0),
// before 0.22.0 scalars wasn't clear after end of a fold block and here was more states
// from the second iteration of fold over array-1
executed_state::request_sent_by(some_peer_id),
];
assert_eq!(actual_trace, expected_trace);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2020 Fluence Labs Limited
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -24,3 +24,4 @@ mod issue_211;
mod issue_214;
mod issue_216;
mod issue_221;
mod issue_241;

View File

@ -16,10 +16,8 @@
use super::air;
use super::lexer::AIRLexer;
use super::lexer::LexerError;
use super::lexer::Token;
use super::ParserError;
use crate::ast::Instruction;
use crate::parser::VariableValidator;
use air::AIRParser;
@ -120,75 +118,6 @@ fn pretty_expected(expected: Vec<String>) -> String {
}
fn parser_error_to_label(file_id: usize, error: ParserError) -> Label<usize> {
use ParserError::*;
match error {
LexerError(error) => lexical_error_to_label(file_id, error),
LambdaAppliedToStream(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
UndefinedIterable(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
UndefinedVariable(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
AmbiguousFailLastError(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
InvalidCallTriplet(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
IteratorRestrictionNotAllowed(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
MultipleIterableValues(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
}
}
fn lexical_error_to_label(file_id: usize, error: LexerError) -> Label<usize> {
use LexerError::*;
match error {
UnclosedQuote(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
EmptyString(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
IsNotAlphanumeric(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
EmptyStreamName(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
EmptyVariableOrConst(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
InvalidLambda(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
UnallowedCharInNumber(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
ParseIntError(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
ParseFloatError(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
LambdaParserError(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
LastErrorPathError(start, end, _) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
TooBigFloat(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
LeadingDot(start, end) => {
Label::primary(file_id, start..end).with_message(error.to_string())
}
}
let span = error.span();
Label::primary(file_id, span.left..span.right).with_message(error.to_string())
}

View File

@ -21,7 +21,7 @@ pub(crate) use triplet::try_to_raw_triplet;
#[macro_export]
macro_rules! make_user_error(
($error_type:ident, $start_pos: ident, $token:expr, $end_pos: ident) => { {
let error = crate::parser::ParserError::$error_type($start_pos, $end_pos);
let error = crate::parser::ParserError::$error_type(crate::parser::Span::new($start_pos, $end_pos));
let error = lalrpop_util::ParseError::User { error };
let dropped_tokens = vec![($start_pos, $token, $end_pos)];

View File

@ -15,6 +15,8 @@
*/
use crate::parser::lexer::LexerError;
use crate::parser::Span;
use thiserror::Error as ThisError;
#[derive(ThisError, Debug, Clone, PartialEq, Eq)]
@ -23,26 +25,82 @@ pub enum ParserError {
LexerError(#[from] LexerError),
#[error("lambda can't be applied to streams in this position")]
LambdaAppliedToStream(usize, usize),
LambdaAppliedToStream(Span),
#[error("variable '{2}' wasn't defined")]
UndefinedVariable(usize, usize, String),
#[error("variable '{variable_name}' wasn't defined")]
UndefinedVariable { span: Span, variable_name: String },
#[error("iterable '{2}' wasn't defined")]
UndefinedIterable(usize, usize, String),
#[error("iterable '{variable_name}' wasn't defined")]
UndefinedIterable { span: Span, variable_name: String },
#[error("last error with non-empty path is ambiguous, please use just %last_error%")]
AmbiguousFailLastError(usize, usize),
AmbiguousFailLastError(Span),
/// Semantic errors in a call instructions.
#[error("call should have service id specified by peer part or function part")]
InvalidCallTriplet(usize, usize),
InvalidCallTriplet(Span),
#[error("new can't be applied to a '{2}' because it's an iterator")]
IteratorRestrictionNotAllowed(usize, usize, String),
#[error("new can't be applied to a '{iterator_name}' because it's an iterator")]
IteratorRestrictionNotAllowed { span: Span, iterator_name: String },
#[error("multiple iterable values found for iterable name '{2}'")]
MultipleIterableValues(usize, usize, String),
#[error("multiple iterable values found for iterator name '{iterator_name}'")]
MultipleIterableValuesForOneIterator { span: Span, iterator_name: String },
#[error(
"multiple next instructions for iterator '{iterator_name}' found for one fold, that is prohibited"
)]
MultipleNextInFold { span: Span, iterator_name: String },
}
impl ParserError {
pub fn span(&self) -> Span {
match self {
Self::LexerError(lexer_error) => lexer_error.span(),
Self::LambdaAppliedToStream(span) => *span,
Self::UndefinedVariable { span, .. } => *span,
Self::UndefinedIterable { span, .. } => *span,
Self::AmbiguousFailLastError(span) => *span,
Self::InvalidCallTriplet(span) => *span,
Self::IteratorRestrictionNotAllowed { span, .. } => *span,
Self::MultipleIterableValuesForOneIterator { span, .. } => *span,
Self::MultipleNextInFold { span, .. } => *span,
}
}
pub fn undefined_variable(span: Span, variable_name: impl Into<String>) -> Self {
Self::UndefinedVariable {
span,
variable_name: variable_name.into(),
}
}
pub fn undefined_iterable(span: Span, variable_name: impl Into<String>) -> Self {
Self::UndefinedIterable {
span,
variable_name: variable_name.into(),
}
}
pub fn invalid_iterator_restriction(span: Span, iterator_name: impl Into<String>) -> Self {
Self::IteratorRestrictionNotAllowed {
span,
iterator_name: iterator_name.into(),
}
}
pub fn multiple_iterables(span: Span, iterator_name: impl Into<String>) -> Self {
Self::MultipleIterableValuesForOneIterator {
span,
iterator_name: iterator_name.into(),
}
}
pub fn multiple_next_in_fold(span: Span, iterator_name: impl Into<String>) -> Self {
Self::MultipleNextInFold {
span,
iterator_name: iterator_name.into(),
}
}
}
impl From<std::convert::Infallible> for ParserError {

View File

@ -94,7 +94,7 @@ impl<'input> AIRLexer<'input> {
}
}
Some(Err(LexerError::UnclosedQuote(start_pos, self.input.len())))
Some(Err(LexerError::unclosed_quote(start_pos..self.input.len())))
}
#[allow(clippy::unnecessary_wraps)]
@ -173,7 +173,7 @@ fn should_stop(ch: char, round_brackets_balance: i64, open_square_brackets_balan
fn string_to_token(input: &str, start_pos: usize) -> LexerResult<Token> {
match input {
"" => Err(LexerError::EmptyString(start_pos, start_pos)),
"" => Err(LexerError::empty_string(start_pos..start_pos)),
CALL_INSTR => Ok(Token::Call),
AP_INSTR => Ok(Token::Ap),
@ -206,17 +206,15 @@ fn parse_last_error(input: &str, start_pos: usize) -> LexerResult<Token<'_>> {
let last_error_size = last_error_size + 2;
if input.len() <= last_error_size {
return Err(LexerError::LambdaParserError(
start_pos + last_error_size,
start_pos + input.len(),
"lambda AST applied to last error has not enough size".to_string(),
return Err(LexerError::lambda_parser_error(
start_pos + last_error_size..start_pos + input.len(),
"lambda AST applied to last error has not enough size",
));
}
let last_error_accessor = crate::parse_lambda(&input[last_error_size..]).map_err(|e| {
LexerError::LambdaParserError(
start_pos + last_error_size,
start_pos + input.len(),
LexerError::lambda_parser_error(
start_pos + last_error_size..start_pos + input.len(),
e.to_string(),
)
})?;

View File

@ -55,7 +55,7 @@ impl<'input> CallVariableParser<'input> {
let mut string_to_parse_iter = string_to_parse.char_indices().peekable();
let (current_pos, current_char) = match string_to_parse_iter.next() {
Some(pos_and_ch) => pos_and_ch,
None => return Err(LexerError::EmptyVariableOrConst(start_pos, start_pos)),
None => return Err(LexerError::empty_variable_or_const(start_pos..start_pos)),
};
let state = ParserState {
@ -145,7 +145,7 @@ impl<'input> CallVariableParser<'input> {
// filter out +.12 -.2315 variants
if is_first_dot && !self.state.digit_met {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::LeadingDot(error_pos, error_pos));
return Err(LexerError::leading_dot(error_pos..error_pos));
}
Ok(is_first_dot)
@ -161,7 +161,7 @@ impl<'input> CallVariableParser<'input> {
fn check_fallback_to_variable(&self) -> LexerResult<()> {
if self.dot_met() {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::UnallowedCharInNumber(error_pos, error_pos));
return Err(LexerError::unallowed_char_in_number(error_pos..error_pos));
}
Ok(())
@ -183,7 +183,7 @@ impl<'input> CallVariableParser<'input> {
if self.current_pos() == 0 && self.current_char() == STREAM_START_TAG {
if self.string_to_parse.len() == 1 {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::EmptyStreamName(error_pos, error_pos));
return Err(LexerError::empty_stream_name(error_pos..error_pos));
}
self.state.is_first_stream_tag = true;
@ -200,7 +200,7 @@ impl<'input> CallVariableParser<'input> {
fn try_parse_as_alphanumeric(&self) -> LexerResult<()> {
if !self.air_alphanumeric() {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::IsNotAlphanumeric(error_pos, error_pos));
return Err(LexerError::is_not_alphanumeric(error_pos..error_pos));
}
Ok(())
@ -209,7 +209,7 @@ impl<'input> CallVariableParser<'input> {
fn try_parse_as_json_path(&mut self) -> LexerResult<()> {
if !self.json_path_allowed_char() && !self.try_parse_as_flattening() {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::InvalidLambda(error_pos, error_pos));
return Err(LexerError::invalid_lambda(error_pos..error_pos));
}
Ok(())
@ -227,9 +227,8 @@ impl<'input> CallVariableParser<'input> {
fn try_parse_first_met_dot(&mut self) -> LexerResult<bool> {
if !self.dot_met() && self.current_char() == '.' {
if self.current_pos() == 0 {
return Err(LexerError::LeadingDot(
self.start_pos,
self.pos_in_string_to_parse(),
return Err(LexerError::leading_dot(
self.start_pos..self.pos_in_string_to_parse(),
));
}
self.state.first_dot_met_pos = Some(self.current_pos());
@ -305,9 +304,8 @@ impl<'input> CallVariableParser<'input> {
// +2 to ignore ".$" prefix
let lambda =
crate::parse_lambda(&self.string_to_parse[lambda_start_pos + 2..]).map_err(|e| {
LexerError::LambdaParserError(
self.start_pos + lambda_start_pos,
self.start_pos + self.string_to_parse.len(),
LexerError::lambda_parser_error(
self.start_pos + lambda_start_pos..self.start_pos + self.string_to_parse.len(),
e.to_string(),
)
})?;
@ -321,7 +319,7 @@ impl<'input> CallVariableParser<'input> {
let raw_value = self.string_to_parse;
let number = raw_value.parse::<i64>().map_err(|e| {
let start_pos = self.start_pos;
LexerError::ParseIntError(start_pos, start_pos + raw_value.len(), e)
LexerError::parse_int_error(start_pos..start_pos + raw_value.len(), e)
})?;
let token = Token::I64(number);
@ -335,15 +333,14 @@ impl<'input> CallVariableParser<'input> {
let raw_value = self.string_to_parse;
let start_pos = self.start_pos;
if raw_value.len() > SAFE_FLOAT_SIGNIFICAND_SIZE {
return Err(LexerError::TooBigFloat(
start_pos,
start_pos + raw_value.len(),
return Err(LexerError::too_big_float(
start_pos..start_pos + raw_value.len(),
));
}
let number = raw_value
.parse::<f64>()
.map_err(|e| LexerError::ParseFloatError(start_pos, start_pos + raw_value.len(), e))?;
let number = raw_value.parse::<f64>().map_err(|e| {
LexerError::parse_float_error(start_pos..start_pos + raw_value.len(), e)
})?;
let token = Token::F64(number);
Ok(token)

View File

@ -14,52 +14,140 @@
* limitations under the License.
*/
use crate::parser::Span;
use thiserror::Error as ThisError;
use std::num::ParseFloatError;
use std::num::ParseIntError;
use std::ops::Range;
#[derive(ThisError, Debug, Clone, PartialEq, Eq)]
pub enum LexerError {
#[error("this string literal has unclosed quote")]
UnclosedQuote(usize, usize),
UnclosedQuote(Span),
#[error("empty string aren't allowed in this position")]
EmptyString(usize, usize),
EmptyString(Span),
#[error("only alphanumeric, '_', and '-' characters are allowed in this position")]
IsNotAlphanumeric(usize, usize),
IsNotAlphanumeric(Span),
#[error("a stream name should be non empty")]
EmptyStreamName(usize, usize),
EmptyStreamName(Span),
#[error("this variable or constant shouldn't have empty name")]
EmptyVariableOrConst(usize, usize),
EmptyVariableOrConst(Span),
#[error("invalid character in lambda")]
InvalidLambda(usize, usize),
InvalidLambda(Span),
#[error("a digit could contain only digits or one dot")]
UnallowedCharInNumber(usize, usize),
UnallowedCharInNumber(Span),
#[error("{2}")]
ParseIntError(usize, usize, #[source] ParseIntError),
#[error("{1}")]
ParseIntError(Span, #[source] ParseIntError),
#[error("{2}")]
ParseFloatError(usize, usize, #[source] ParseFloatError),
#[error("{1}")]
ParseFloatError(Span, #[source] ParseFloatError),
// TODO: use LambdaParserError directly here (it'll require introducing a lifetime)
#[error("{2}")]
LambdaParserError(usize, usize, String),
#[error("{se_lambda_parser_error}")]
LambdaParserError {
span: Span,
se_lambda_parser_error: String,
},
#[error("{2} is an incorrect path for %last_error%, only .$.instruction, .$.msg, and .$.peer_id are allowed")]
LastErrorPathError(usize, usize, String),
#[error("{error_path} is an incorrect path for %last_error%, only .$.instruction, .$.msg, and .$.peer_id are allowed")]
LastErrorPathError { span: Span, error_path: String },
#[error("this float is too big, a float could contain less than 12 digits")]
TooBigFloat(usize, usize),
TooBigFloat(Span),
#[error("leading dot without any symbols before - please write 0 if it's float or variable name if it's a lambda")]
LeadingDot(usize, usize),
LeadingDot(Span),
}
impl LexerError {
pub fn span(&self) -> Span {
let span = match self {
Self::UnclosedQuote(span) => span,
Self::EmptyString(span) => span,
Self::IsNotAlphanumeric(span) => span,
Self::EmptyStreamName(span) => span,
Self::EmptyVariableOrConst(span) => span,
Self::InvalidLambda(span) => span,
Self::UnallowedCharInNumber(span) => span,
Self::ParseIntError(span, _) => span,
Self::ParseFloatError(span, _) => span,
Self::LambdaParserError { span, .. } => span,
Self::LastErrorPathError { span, .. } => span,
Self::TooBigFloat(span) => span,
Self::LeadingDot(span) => span,
};
*span
}
pub fn unclosed_quote(range: Range<usize>) -> Self {
Self::UnclosedQuote(range.into())
}
pub fn empty_string(range: Range<usize>) -> Self {
Self::EmptyString(range.into())
}
pub fn is_not_alphanumeric(range: Range<usize>) -> Self {
Self::IsNotAlphanumeric(range.into())
}
pub fn empty_stream_name(range: Range<usize>) -> Self {
Self::EmptyStreamName(range.into())
}
pub fn empty_variable_or_const(range: Range<usize>) -> Self {
Self::EmptyVariableOrConst(range.into())
}
pub fn invalid_lambda(range: Range<usize>) -> Self {
Self::InvalidLambda(range.into())
}
pub fn unallowed_char_in_number(range: Range<usize>) -> Self {
Self::UnallowedCharInNumber(range.into())
}
pub fn parse_int_error(range: Range<usize>, parse_int_error: ParseIntError) -> Self {
Self::ParseIntError(range.into(), parse_int_error)
}
pub fn parse_float_error(range: Range<usize>, parse_float_error: ParseFloatError) -> Self {
Self::ParseFloatError(range.into(), parse_float_error)
}
pub fn lambda_parser_error(
range: Range<usize>,
se_lambda_parser_error: impl Into<String>,
) -> Self {
Self::LambdaParserError {
span: range.into(),
se_lambda_parser_error: se_lambda_parser_error.into(),
}
}
pub fn last_error_path_error(range: Range<usize>, error_path: String) -> Self {
Self::LastErrorPathError {
span: range.into(),
error_path,
}
}
pub fn too_big_float(range: Range<usize>) -> Self {
Self::TooBigFloat(range.into())
}
pub fn leading_dot(range: Range<usize>) -> Self {
Self::LeadingDot(range.into())
}
}
use super::Token;

View File

@ -274,7 +274,7 @@ fn too_big_float_number() {
lexer_test(
FNUMBER,
Single(Err(LexerError::TooBigFloat(0, FNUMBER.len()))),
Single(Err(LexerError::too_big_float(0..FNUMBER.len()))),
);
}
@ -308,13 +308,16 @@ fn lambda() {
fn lambda_path_numbers() {
const LAMBDA: &str = r#"12345.$[$@[]():?.*,"]"#;
lexer_test(LAMBDA, Single(Err(LexerError::UnallowedCharInNumber(6, 6))));
lexer_test(
LAMBDA,
Single(Err(LexerError::unallowed_char_in_number(6..6))),
);
const LAMBDA1: &str = r#"+12345.$[$@[]():?.*,"]"#;
lexer_test(
LAMBDA1,
Single(Err(LexerError::UnallowedCharInNumber(7, 7))),
Single(Err(LexerError::unallowed_char_in_number(7..7))),
);
}
@ -322,13 +325,13 @@ fn lambda_path_numbers() {
fn leading_dot() {
const LEADING_DOT: &str = ".111";
lexer_test(LEADING_DOT, Single(Err(LexerError::LeadingDot(0, 0))));
lexer_test(LEADING_DOT, Single(Err(LexerError::leading_dot(0..0))));
const LEADING_DOT_AFTER_SIGN: &str = "+.1111";
lexer_test(
LEADING_DOT_AFTER_SIGN,
Single(Err(LexerError::LeadingDot(1, 1))),
Single(Err(LexerError::leading_dot(1..1))),
);
}
@ -338,7 +341,7 @@ fn unclosed_quote() {
lexer_test(
UNCLOSED_QUOTE_AIR,
One(4, Err(LexerError::IsNotAlphanumeric(33, 33))),
One(4, Err(LexerError::is_not_alphanumeric(33..33))),
);
}
@ -349,20 +352,26 @@ fn bad_value() {
lexer_test(
INVALID_VALUE,
Single(Err(LexerError::IsNotAlphanumeric(3, 3))),
Single(Err(LexerError::is_not_alphanumeric(3..3))),
);
// value contains ! that only allowed at the end of a lambda expression
const INVALID_VALUE2: &str = r#"value.$![$@[]():?.*,"\]"#;
lexer_test(INVALID_VALUE2, Single(Err(LexerError::InvalidLambda(7, 7))));
lexer_test(
INVALID_VALUE2,
Single(Err(LexerError::invalid_lambda(7..7))),
);
}
#[test]
fn invalid_lambda() {
const INVALID_LAMBDA: &str = r#"value.$%"#;
lexer_test(INVALID_LAMBDA, Single(Err(LexerError::InvalidLambda(7, 7))));
lexer_test(
INVALID_LAMBDA,
Single(Err(LexerError::invalid_lambda(7..7))),
);
}
#[test]
@ -370,7 +379,7 @@ fn invalid_lambda_numbers() {
// this lambda contains all allowed in lambda characters
const LAMBDA: &str = r#"+12345$[$@[]():?.*,"!]"#;
lexer_test(LAMBDA, Single(Err(LexerError::IsNotAlphanumeric(6, 6))));
lexer_test(LAMBDA, Single(Err(LexerError::is_not_alphanumeric(6..6))));
}
#[test]

View File

@ -17,6 +17,8 @@
use serde::Deserialize;
use serde::Serialize;
use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Span {
pub left: usize,
@ -37,6 +39,15 @@ impl Span {
}
}
impl From<Range<usize>> for Span {
fn from(range: Range<usize>) -> Self {
Self {
left: range.start,
right: range.end,
}
}
}
use std::cmp::Ordering;
impl PartialOrd for Span {

View File

@ -117,7 +117,10 @@ fn parse_undefined_variable() {
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedVariable(..)));
assert!(matches!(
parser_error,
ParserError::UndefinedVariable { .. }
));
}
}
@ -165,7 +168,10 @@ fn parse_undefined_stream_with_lambda() {
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedVariable(..)));
assert!(matches!(
parser_error,
ParserError::UndefinedVariable { .. }
));
}
#[test]
@ -584,5 +590,8 @@ fn not_defined_scalar_in_lambda() {
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedVariable(..)));
assert!(matches!(
parser_error,
ParserError::UndefinedVariable { .. }
));
}

View File

@ -23,7 +23,7 @@ use air_lambda_ast::ValueAccessor;
use lalrpop_util::ParseError;
#[test]
fn parse_undefined_iterable() {
fn fold_with_undefined_iterable() {
let source_code = r#"
(seq
(call "" ("" "") [] iterable)
@ -55,11 +55,14 @@ fn parse_undefined_iterable() {
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedIterable(..)));
assert!(matches!(
parser_error,
ParserError::UndefinedIterable { .. }
));
}
#[test]
fn parse_fold_with_undefined_iterable() {
fn fold_with_undefined_variable() {
let source_code = r#"
(seq
(null)
@ -91,7 +94,124 @@ fn parse_fold_with_undefined_iterable() {
_ => panic!("unexpected error type"),
};
assert!(matches!(parser_error, ParserError::UndefinedVariable(..)));
assert!(matches!(
parser_error,
ParserError::UndefinedVariable { .. }
));
}
#[test]
fn scalar_fold_with_multiple_nexts_inside() {
let source_code = r#"
(seq
(call "" ("" "") [] iterable)
(fold iterable i
(seq
(next i)
(next i)
)
)
)
"#;
let lexer = crate::AIRLexer::new(source_code);
let parser = crate::AIRParser::new();
let mut errors = Vec::new();
let mut validator = crate::parser::VariableValidator::new();
parser
.parse(source_code, &mut errors, &mut validator, lexer)
.expect("parser shouldn't fail");
let errors = validator.finalize();
assert_eq!(errors.len(), 1);
let error = &errors[0].error;
let parser_error = match error {
ParseError::User { error } => error,
_ => panic!("unexpected error type"),
};
assert!(matches!(
parser_error,
ParserError::MultipleNextInFold { .. }
));
}
#[test]
fn multiple_scalar_folds_with_same_iterator() {
let source_code = r#"
(seq
(call "" ("" "") [] iterable)
(seq
(fold iterable i
(seq
(null)
(next i)
)
)
(fold iterable i
(seq
(null)
(next i)
)
)
)
)
"#;
let lexer = crate::AIRLexer::new(source_code);
let parser = crate::AIRParser::new();
let mut errors = Vec::new();
let mut validator = crate::parser::VariableValidator::new();
parser
.parse(source_code, &mut errors, &mut validator, lexer)
.expect("parser shouldn't fail");
let errors = validator.finalize();
assert!(errors.is_empty());
}
#[test]
fn stream_fold_with_multiple_nexts_inside() {
let source_code = r#"
(seq
(call "" ("" "") [] $stream)
(fold $stream i
(seq
(next i)
(next i)
)
)
)
"#;
let lexer = crate::AIRLexer::new(source_code);
let parser = crate::AIRParser::new();
let mut errors = Vec::new();
let mut validator = crate::parser::VariableValidator::new();
parser
.parse(source_code, &mut errors, &mut validator, lexer)
.expect("parser shouldn't fail");
let errors = validator.finalize();
assert_eq!(errors.len(), 1);
let error = &errors[0].error;
let parser_error = match error {
ParseError::User { error } => error,
_ => panic!("unexpected error type"),
};
assert!(matches!(
parser_error,
ParserError::MultipleNextInFold { .. }
));
}
#[test]
@ -137,7 +257,7 @@ fn parse_fold_with_multiple_iterator() {
assert!(matches!(
parser_error,
ParserError::MultipleIterableValues(..)
ParserError::MultipleIterableValuesForOneIterator { .. }
));
}

View File

@ -79,6 +79,6 @@ fn iterators_cant_be_restricted() {
assert!(matches!(
parser_error,
ParserError::IteratorRestrictionNotAllowed(..)
ParserError::IteratorRestrictionNotAllowed { .. }
));
}

View File

@ -40,7 +40,7 @@ pub struct VariableValidator<'i> {
/// Contains the most left definition of a variables met in call outputs.
met_variable_definitions: HashMap<&'i str, Span>,
/// Contains iterables met in fold iterables.
/// Contains iterators defined in a fold block.
met_iterator_definitions: MultiMap<&'i str, Span>,
/// These variables from calls and folds haven't been resolved at the first meet.
@ -50,8 +50,12 @@ pub struct VariableValidator<'i> {
/// due to the way how lalrpop work.
unresolved_iterables: MultiMap<&'i str, Span>,
/// Contains all met iterable in call and next, they will be resolved after the whole parsing
/// due to the way how lalrpop work.
multiple_next_candidates: MultiMap<&'i str, Span>,
/// Contains all names that should be checked that they are not iterators.
check_for_non_iterators: Vec<(&'i str, Span)>,
not_iterators_candidates: Vec<(&'i str, Span)>,
}
impl<'i> VariableValidator<'i> {
@ -98,7 +102,7 @@ impl<'i> VariableValidator<'i> {
}
pub(super) fn met_new(&mut self, new: &New<'i>, span: Span) {
self.check_for_non_iterators
self.not_iterators_candidates
.push((new.variable.name(), span));
// new defines a new variable
self.met_variable_definition(&new.variable, span);
@ -110,6 +114,7 @@ impl<'i> VariableValidator<'i> {
// than a corresponding fold instruction with the definition of this iterable, so they're
// just put without a check for being already met
self.unresolved_iterables.insert(iterable_name, span);
self.multiple_next_candidates.insert(iterable_name, span);
}
pub(super) fn met_ap(&mut self, ap: &Ap<'i>, span: Span) {
@ -128,39 +133,13 @@ impl<'i> VariableValidator<'i> {
}
pub(super) fn finalize(self) -> Vec<ErrorRecovery<usize, Token<'i>, ParserError>> {
let mut errors = Vec::new();
for (name, span) in self.unresolved_variables.iter() {
if !self.contains_variable(name, *span) {
add_to_errors(*name, &mut errors, *span, Token::Call);
}
}
for (name, span) in self.unresolved_iterables.iter() {
if !self.contains_iterable(name, *span) {
add_to_errors(*name, &mut errors, *span, Token::Next);
}
}
for (name, span) in self.check_for_non_iterators.iter() {
if self.contains_iterable(name, *span) {
add_to_errors(*name, &mut errors, *span, Token::New);
}
}
for (name, mut spans) in self.met_iterator_definitions.into_iter() {
spans.sort();
let mut prev_span: Option<Span> = None;
for span in spans {
match prev_span {
Some(prev_span) if prev_span.contains_span(span) => {
add_to_errors(name, &mut errors, span, Token::Fold)
}
Some(_) | None => prev_span = Some(span),
}
}
}
errors
ValidatorErrorBuilder::new(self)
.check_undefined_variables()
.check_undefined_iterables()
.check_multiple_next_in_fold()
.check_new_on_iterators()
.check_iterator_for_multiple_definitions()
.build()
}
fn met_args(&mut self, args: &[Value<'i>], span: Span) {
@ -259,38 +238,149 @@ impl<'i> VariableValidator<'i> {
}
}
/// Checks that multimap contains a span for given key such that provided span lies inside it.
fn contains_iterable(&self, key: &str, key_span: Span) -> bool {
let found_spans = match self.met_iterator_definitions.get_vec(key) {
Some(found_spans) => found_spans,
None => return false,
};
found_spans
.iter()
.any(|s| s.left < key_span.left && s.right > key_span.right)
}
fn met_iterator_definition(&mut self, iterator: &Scalar<'i>, span: Span) {
self.met_iterator_definitions.insert(iterator.name, span);
}
}
fn add_to_errors<'err, 'i>(
variable_name: impl Into<String>,
errors: &'err mut Vec<ErrorRecovery<usize, Token<'i>, ParserError>>,
struct ValidatorErrorBuilder<'i> {
errors: Vec<ErrorRecovery<usize, Token<'i>, ParserError>>,
validator: VariableValidator<'i>,
}
impl<'i> ValidatorErrorBuilder<'i> {
fn new(validator: VariableValidator<'i>) -> Self {
let mut builder = Self {
errors: Vec::new(),
validator,
};
builder.sort_iterator_definitions();
builder
}
fn sort_iterator_definitions(&mut self) {
for (_, spans) in self.validator.met_iterator_definitions.iter_all_mut() {
spans.sort()
}
}
/// Check that all variables were defined.
fn check_undefined_variables(mut self) -> Self {
for (name, span) in self.validator.unresolved_variables.iter() {
if !self.validator.contains_variable(name, *span) {
let error = ParserError::undefined_variable(*span, *name);
add_to_errors(&mut self.errors, *span, Token::Call, error);
}
}
self
}
/// Check that all iterables in fold blocks were defined.
fn check_undefined_iterables(mut self) -> Self {
for (name, span) in self.validator.unresolved_iterables.iter() {
if self.find_closest_fold_span(name, *span).is_none() {
let error = ParserError::undefined_iterable(*span, *name);
add_to_errors(&mut self.errors, *span, Token::New, error);
}
}
self
}
/// Check that a fold block contains not more than one next with a corresponding iterator.
fn check_multiple_next_in_fold(mut self) -> Self {
// Approach used here is based on an assumption that each one iterator belongs only to one
// fold block at any depth. This is checked by check_iterator_for_multiple_definitions and
// allows to consider only one fold block where this variable was defined. Then a error
// is produced if there are more than one suck block.
for (name, spans) in self.validator.multiple_next_candidates.iter_all() {
let mut collected_fold_spans = std::collections::HashSet::new();
for span in spans {
let current_span = match self.find_closest_fold_span(name, *span) {
Some(fold_span) => fold_span,
// this would be checked in check_undefined_iterables
None => {
continue;
}
};
if !collected_fold_spans.insert(current_span) {
let error = ParserError::multiple_next_in_fold(*span, *name);
add_to_errors(&mut self.errors, *span, Token::Next, error);
}
}
}
self
}
/// Check that a new operator wasn't applied to iterators.
fn check_new_on_iterators(mut self) -> Self {
for (name, span) in self.validator.not_iterators_candidates.iter() {
if self.find_closest_fold_span(name, *span).is_some() {
let error = ParserError::invalid_iterator_restriction(*span, *name);
add_to_errors(&mut self.errors, *span, Token::New, error);
}
}
self
}
/// Check that one iterator belongs to only one fold.
/// F.e. such cases are prohibited
/// (fold iterable_1 iterator
/// ...
/// (fold iterable_2 iterator
/// ...
/// )
/// )
fn check_iterator_for_multiple_definitions(mut self) -> Self {
for (name, spans) in self.validator.met_iterator_definitions.iter_all_mut() {
spans.sort();
let mut prev_span: Option<Span> = None;
for &span in spans.iter() {
match prev_span {
Some(prev_span) if prev_span.contains_span(span) => {
let error = ParserError::multiple_iterables(span, *name);
add_to_errors(&mut self.errors, span, Token::Fold, error);
}
Some(_) | None => prev_span = Some(span),
}
}
}
self
}
fn build(self) -> Vec<ErrorRecovery<usize, Token<'i>, ParserError>> {
self.errors
}
/// Checks that met_iterator_definitions contains a span for given key such that provided
/// span lies inside it. This functions assumes that spans are sorted and that why returns
/// the closest span in the list.
fn find_closest_fold_span(&self, key: &str, key_span: Span) -> Option<Span> {
let found_spans = match self.validator.met_iterator_definitions.get_vec(key) {
Some(found_spans) => found_spans,
None => return None,
};
found_spans
.iter()
.filter(|&s| s.contains_span(key_span))
.last()
.cloned()
}
}
fn add_to_errors<'i>(
errors: &mut Vec<ErrorRecovery<usize, Token<'i>, ParserError>>,
span: Span,
token: Token<'i>,
error: ParserError,
) {
let variable_name = variable_name.into();
let error = match token {
Token::Next => ParserError::UndefinedIterable(span.left, span.right, variable_name),
Token::New => {
ParserError::IteratorRestrictionNotAllowed(span.left, span.right, variable_name)
}
Token::Fold => ParserError::MultipleIterableValues(span.left, span.right, variable_name),
_ => ParserError::UndefinedVariable(span.left, span.right, variable_name),
};
let error = ParseError::User { error };
let dropped_tokens = vec![(span.left, token, span.right)];

View File

@ -30,7 +30,7 @@ pub type ExecutionTrace = Vec<ExecutedState>;
/// have the following format.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InterpreterData {
/// Trace of AIR execution, which contains executed call, par and fold states.
/// Trace of AIR execution, which contains executed call, par, fold, and ap states.
pub trace: ExecutionTrace,
/// Contains maximum generation for each global stream. This info will be used while merging