feat(air-parser): canon stream syntax (#618)

feat(air-parser): improved canon stream syntax support [fixes VM-293]
This commit is contained in:
raftedproc 2023-07-25 12:38:10 +03:00 committed by GitHub
parent 6bd60d48e7
commit 8871465324
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 116 additions and 37 deletions

View File

@ -30,11 +30,12 @@ pub(super) fn try_parse_call_variable(
CallVariableParser::try_parse(string_to_parse, start_pos)
}
#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
enum MetTag {
None,
Stream,
StreamMap,
Canon,
CanonStream,
}
@ -175,7 +176,10 @@ impl<'input> CallVariableParser<'input> {
}
fn try_parse_as_variable(&mut self) -> LexerResult<()> {
if self.try_parse_as_stream_start()? || self.try_parse_as_json_path_start()? {
if self.try_parse_as_canon()?
|| self.try_parse_as_stream()?
|| self.try_parse_as_json_path_start()?
{
return Ok(());
} else if self.is_json_path_started() {
self.try_parse_as_json_path()?;
@ -186,15 +190,31 @@ impl<'input> CallVariableParser<'input> {
Ok(())
}
fn try_parse_as_stream_start(&mut self) -> LexerResult<bool> {
let stream_tag = MetTag::from_tag(self.current_char());
if self.current_offset() == 0 && stream_tag.is_tag() {
fn try_parse_as_stream(&mut self) -> LexerResult<bool> {
let tag = MetTag::from_tag(self.current_char());
if self.current_offset() == 0 && tag.is_tag() {
if self.string_to_parse.len() == 1 {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::empty_stream_name(error_pos..error_pos));
return Err(LexerError::empty_tagged_name(error_pos..error_pos));
}
self.state.met_tag = stream_tag;
self.state.met_tag = tag;
return Ok(true);
}
Ok(false)
}
fn try_parse_as_canon(&mut self) -> LexerResult<bool> {
let tag = self.state.met_tag.deduce_tag(self.current_char());
if self.current_offset() == 1 && tag.is_canon_stream() {
if self.string_to_parse.len() == 2 && tag.is_tag() {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::empty_canon_name(error_pos..error_pos));
}
self.state.met_tag = tag;
return Ok(true);
}
@ -238,6 +258,9 @@ impl<'input> CallVariableParser<'input> {
return Err(LexerError::leading_dot(
self.start_pos..self.pos_in_string_to_parse(),
));
} else if self.state.met_tag.is_tag() && self.current_offset() <= 2 {
let prev_pos = self.pos_in_string_to_parse() - 1;
return Err(LexerError::empty_canon_name(prev_pos..prev_pos));
}
self.state.first_dot_met_pos = Some(self.current_offset());
return Ok(true);
@ -288,7 +311,7 @@ impl<'input> CallVariableParser<'input> {
name,
position: self.start_pos,
},
MetTag::CanonStream => Token::CanonStream {
MetTag::CanonStream | MetTag::Canon => Token::CanonStream {
name,
position: self.start_pos,
},
@ -311,7 +334,7 @@ impl<'input> CallVariableParser<'input> {
lambda,
position: self.start_pos,
},
MetTag::CanonStream => Token::CanonStreamWithLambda {
MetTag::CanonStream | MetTag::Canon => Token::CanonStreamWithLambda {
name,
lambda,
position: self.start_pos,
@ -383,16 +406,35 @@ impl<'input> CallVariableParser<'input> {
}
}
/// There are two kinds of tags ATM, namely tag and canon tag.
/// Tag defines the first level and comes first in a variable name, e.g. $stream.
/// Canon tag is the only tag that ATM defines the second level.
/// Canon tag comes second in a variable name, e.g. #$canon_stream.
impl MetTag {
fn from_tag(tag: char) -> Self {
match tag {
'$' => Self::Stream,
'#' => Self::CanonStream,
'#' => Self::Canon,
'%' => Self::StreamMap,
_ => Self::None,
}
}
fn deduce_tag(&self, tag: char) -> Self {
match tag {
'$' if self.is_canon() => Self::CanonStream,
_ => self.to_owned(),
}
}
fn is_canon(&self) -> bool {
matches!(self, Self::Canon)
}
fn is_canon_stream(&self) -> bool {
matches!(self, Self::CanonStream)
}
fn is_tag(&self) -> bool {
!matches!(self, Self::None)
}

View File

@ -34,8 +34,11 @@ pub enum LexerError {
#[error("only alphanumeric, '_', and '-' characters are allowed in this position")]
IsNotAlphanumeric(Span),
#[error("a stream name should be non empty")]
EmptyStreamName(Span),
#[error("a tagged name should be non empty")]
EmptyTaggedName(Span),
#[error("a canon name should be non empty")]
EmptyCanonName(Span),
#[error("this variable or constant shouldn't have empty name")]
EmptyVariableOrConst(Span),
@ -75,7 +78,8 @@ impl LexerError {
Self::UnclosedQuote(span) => span,
Self::EmptyString(span) => span,
Self::IsNotAlphanumeric(span) => span,
Self::EmptyStreamName(span) => span,
Self::EmptyTaggedName(span) => span,
Self::EmptyCanonName(span) => span,
Self::EmptyVariableOrConst(span) => span,
Self::InvalidLambda(span) => span,
Self::UnallowedCharInNumber(span) => span,
@ -102,8 +106,12 @@ impl LexerError {
Self::IsNotAlphanumeric(range.into())
}
pub fn empty_stream_name(range: Range<AirPos>) -> Self {
Self::EmptyStreamName(range.into())
pub fn empty_tagged_name(range: Range<AirPos>) -> Self {
Self::EmptyTaggedName(range.into())
}
pub fn empty_canon_name(range: Range<AirPos>) -> Self {
Self::EmptyCanonName(range.into())
}
pub fn empty_variable_or_const(range: Range<AirPos>) -> Self {

View File

@ -215,37 +215,66 @@ fn stream_map() {
#[test]
fn canon_stream() {
const CANON_STREAM: &str = "#stream____asdasd";
for canon_stream_name in vec!["#stream____asdasd", "#$stream____asdasd"] {
lexer_test(
canon_stream_name,
Single(Ok((
0.into(),
Token::CanonStream {
name: canon_stream_name,
position: 0.into(),
},
canon_stream_name.len().into(),
))),
);
}
let cannon_stream_name = "#s$stream____asdasd";
lexer_test(
CANON_STREAM,
Single(Ok((
0.into(),
Token::CanonStream {
name: CANON_STREAM,
position: 0.into(),
},
CANON_STREAM.len().into(),
))),
cannon_stream_name,
Single(Err(LexerError::is_not_alphanumeric(2.into()..2.into()))),
);
let cannon_stream_name = "#";
lexer_test(
cannon_stream_name,
Single(Err(LexerError::empty_tagged_name(0.into()..0.into()))),
);
}
#[test]
fn canon_stream_with_functor() {
let canon_stream_name = "#canon_stream";
let canon_stream_with_functor: String = format!("{canon_stream_name}.length");
for canon_stream_name in vec!["#canon_stream", "#$canon_stream"] {
let canon_stream_with_functor: String = format!("{canon_stream_name}.length");
lexer_test(
&canon_stream_with_functor,
Single(Ok((
0.into(),
Token::CanonStreamWithLambda {
name: canon_stream_name,
lambda: LambdaAST::Functor(Functor::Length),
position: 0.into(),
},
canon_stream_with_functor.len().into(),
))),
);
}
let cannon_stream_name = "#s$stream____asdasd.length";
lexer_test(
&canon_stream_with_functor,
Single(Ok((
0.into(),
Token::CanonStreamWithLambda {
name: canon_stream_name,
lambda: LambdaAST::Functor(Functor::Length),
position: 0.into(),
},
canon_stream_with_functor.len().into(),
))),
cannon_stream_name,
Single(Err(LexerError::is_not_alphanumeric(2.into()..2.into()))),
);
let cannon_stream_name = "#.length";
lexer_test(
cannon_stream_name,
Single(Err(LexerError::empty_canon_name(0.into()..0.into()))),
);
let cannon_stream_name = "#$.length";
lexer_test(
cannon_stream_name,
Single(Err(LexerError::empty_canon_name(1.into()..1.into()))),
);
}