feat(air-parser): canon stream syntax (#618)

feat(air-parser): improved canon stream syntax support [fixes VM-293]
This commit is contained in:
raftedproc 2023-07-25 12:38:10 +03:00 committed by GitHub
parent 6bd60d48e7
commit 8871465324
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 116 additions and 37 deletions

View File

@ -30,11 +30,12 @@ pub(super) fn try_parse_call_variable(
CallVariableParser::try_parse(string_to_parse, start_pos) CallVariableParser::try_parse(string_to_parse, start_pos)
} }
#[derive(Debug)] #[derive(Debug, Clone, Copy)]
enum MetTag { enum MetTag {
None, None,
Stream, Stream,
StreamMap, StreamMap,
Canon,
CanonStream, CanonStream,
} }
@ -175,7 +176,10 @@ impl<'input> CallVariableParser<'input> {
} }
fn try_parse_as_variable(&mut self) -> LexerResult<()> { fn try_parse_as_variable(&mut self) -> LexerResult<()> {
if self.try_parse_as_stream_start()? || self.try_parse_as_json_path_start()? { if self.try_parse_as_canon()?
|| self.try_parse_as_stream()?
|| self.try_parse_as_json_path_start()?
{
return Ok(()); return Ok(());
} else if self.is_json_path_started() { } else if self.is_json_path_started() {
self.try_parse_as_json_path()?; self.try_parse_as_json_path()?;
@ -186,15 +190,31 @@ impl<'input> CallVariableParser<'input> {
Ok(()) Ok(())
} }
fn try_parse_as_stream_start(&mut self) -> LexerResult<bool> { fn try_parse_as_stream(&mut self) -> LexerResult<bool> {
let stream_tag = MetTag::from_tag(self.current_char()); let tag = MetTag::from_tag(self.current_char());
if self.current_offset() == 0 && stream_tag.is_tag() { if self.current_offset() == 0 && tag.is_tag() {
if self.string_to_parse.len() == 1 { if self.string_to_parse.len() == 1 {
let error_pos = self.pos_in_string_to_parse(); let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::empty_stream_name(error_pos..error_pos)); return Err(LexerError::empty_tagged_name(error_pos..error_pos));
} }
self.state.met_tag = stream_tag; self.state.met_tag = tag;
return Ok(true);
}
Ok(false)
}
fn try_parse_as_canon(&mut self) -> LexerResult<bool> {
let tag = self.state.met_tag.deduce_tag(self.current_char());
if self.current_offset() == 1 && tag.is_canon_stream() {
if self.string_to_parse.len() == 2 && tag.is_tag() {
let error_pos = self.pos_in_string_to_parse();
return Err(LexerError::empty_canon_name(error_pos..error_pos));
}
self.state.met_tag = tag;
return Ok(true); return Ok(true);
} }
@ -238,6 +258,9 @@ impl<'input> CallVariableParser<'input> {
return Err(LexerError::leading_dot( return Err(LexerError::leading_dot(
self.start_pos..self.pos_in_string_to_parse(), self.start_pos..self.pos_in_string_to_parse(),
)); ));
} else if self.state.met_tag.is_tag() && self.current_offset() <= 2 {
let prev_pos = self.pos_in_string_to_parse() - 1;
return Err(LexerError::empty_canon_name(prev_pos..prev_pos));
} }
self.state.first_dot_met_pos = Some(self.current_offset()); self.state.first_dot_met_pos = Some(self.current_offset());
return Ok(true); return Ok(true);
@ -288,7 +311,7 @@ impl<'input> CallVariableParser<'input> {
name, name,
position: self.start_pos, position: self.start_pos,
}, },
MetTag::CanonStream => Token::CanonStream { MetTag::CanonStream | MetTag::Canon => Token::CanonStream {
name, name,
position: self.start_pos, position: self.start_pos,
}, },
@ -311,7 +334,7 @@ impl<'input> CallVariableParser<'input> {
lambda, lambda,
position: self.start_pos, position: self.start_pos,
}, },
MetTag::CanonStream => Token::CanonStreamWithLambda { MetTag::CanonStream | MetTag::Canon => Token::CanonStreamWithLambda {
name, name,
lambda, lambda,
position: self.start_pos, position: self.start_pos,
@ -383,16 +406,35 @@ impl<'input> CallVariableParser<'input> {
} }
} }
/// There are two kinds of tags ATM, namely tag and canon tag.
/// Tag defines the first level and comes first in a variable name, e.g. $stream.
/// Canon tag is the only tag that ATM defines the second level.
/// Canon tag comes second in a variable name, e.g. #$canon_stream.
impl MetTag { impl MetTag {
fn from_tag(tag: char) -> Self { fn from_tag(tag: char) -> Self {
match tag { match tag {
'$' => Self::Stream, '$' => Self::Stream,
'#' => Self::CanonStream, '#' => Self::Canon,
'%' => Self::StreamMap, '%' => Self::StreamMap,
_ => Self::None, _ => Self::None,
} }
} }
fn deduce_tag(&self, tag: char) -> Self {
match tag {
'$' if self.is_canon() => Self::CanonStream,
_ => self.to_owned(),
}
}
fn is_canon(&self) -> bool {
matches!(self, Self::Canon)
}
fn is_canon_stream(&self) -> bool {
matches!(self, Self::CanonStream)
}
fn is_tag(&self) -> bool { fn is_tag(&self) -> bool {
!matches!(self, Self::None) !matches!(self, Self::None)
} }

View File

@ -34,8 +34,11 @@ pub enum LexerError {
#[error("only alphanumeric, '_', and '-' characters are allowed in this position")] #[error("only alphanumeric, '_', and '-' characters are allowed in this position")]
IsNotAlphanumeric(Span), IsNotAlphanumeric(Span),
#[error("a stream name should be non empty")] #[error("a tagged name should be non empty")]
EmptyStreamName(Span), EmptyTaggedName(Span),
#[error("a canon name should be non empty")]
EmptyCanonName(Span),
#[error("this variable or constant shouldn't have empty name")] #[error("this variable or constant shouldn't have empty name")]
EmptyVariableOrConst(Span), EmptyVariableOrConst(Span),
@ -75,7 +78,8 @@ impl LexerError {
Self::UnclosedQuote(span) => span, Self::UnclosedQuote(span) => span,
Self::EmptyString(span) => span, Self::EmptyString(span) => span,
Self::IsNotAlphanumeric(span) => span, Self::IsNotAlphanumeric(span) => span,
Self::EmptyStreamName(span) => span, Self::EmptyTaggedName(span) => span,
Self::EmptyCanonName(span) => span,
Self::EmptyVariableOrConst(span) => span, Self::EmptyVariableOrConst(span) => span,
Self::InvalidLambda(span) => span, Self::InvalidLambda(span) => span,
Self::UnallowedCharInNumber(span) => span, Self::UnallowedCharInNumber(span) => span,
@ -102,8 +106,12 @@ impl LexerError {
Self::IsNotAlphanumeric(range.into()) Self::IsNotAlphanumeric(range.into())
} }
pub fn empty_stream_name(range: Range<AirPos>) -> Self { pub fn empty_tagged_name(range: Range<AirPos>) -> Self {
Self::EmptyStreamName(range.into()) Self::EmptyTaggedName(range.into())
}
pub fn empty_canon_name(range: Range<AirPos>) -> Self {
Self::EmptyCanonName(range.into())
} }
pub fn empty_variable_or_const(range: Range<AirPos>) -> Self { pub fn empty_variable_or_const(range: Range<AirPos>) -> Self {

View File

@ -215,37 +215,66 @@ fn stream_map() {
#[test] #[test]
fn canon_stream() { fn canon_stream() {
const CANON_STREAM: &str = "#stream____asdasd"; for canon_stream_name in vec!["#stream____asdasd", "#$stream____asdasd"] {
lexer_test(
canon_stream_name,
Single(Ok((
0.into(),
Token::CanonStream {
name: canon_stream_name,
position: 0.into(),
},
canon_stream_name.len().into(),
))),
);
}
let cannon_stream_name = "#s$stream____asdasd";
lexer_test( lexer_test(
CANON_STREAM, cannon_stream_name,
Single(Ok(( Single(Err(LexerError::is_not_alphanumeric(2.into()..2.into()))),
0.into(), );
Token::CanonStream {
name: CANON_STREAM, let cannon_stream_name = "#";
position: 0.into(), lexer_test(
}, cannon_stream_name,
CANON_STREAM.len().into(), Single(Err(LexerError::empty_tagged_name(0.into()..0.into()))),
))),
); );
} }
#[test] #[test]
fn canon_stream_with_functor() { fn canon_stream_with_functor() {
let canon_stream_name = "#canon_stream"; for canon_stream_name in vec!["#canon_stream", "#$canon_stream"] {
let canon_stream_with_functor: String = format!("{canon_stream_name}.length"); let canon_stream_with_functor: String = format!("{canon_stream_name}.length");
lexer_test(
&canon_stream_with_functor,
Single(Ok((
0.into(),
Token::CanonStreamWithLambda {
name: canon_stream_name,
lambda: LambdaAST::Functor(Functor::Length),
position: 0.into(),
},
canon_stream_with_functor.len().into(),
))),
);
}
let cannon_stream_name = "#s$stream____asdasd.length";
lexer_test( lexer_test(
&canon_stream_with_functor, cannon_stream_name,
Single(Ok(( Single(Err(LexerError::is_not_alphanumeric(2.into()..2.into()))),
0.into(), );
Token::CanonStreamWithLambda { let cannon_stream_name = "#.length";
name: canon_stream_name, lexer_test(
lambda: LambdaAST::Functor(Functor::Length), cannon_stream_name,
position: 0.into(), Single(Err(LexerError::empty_canon_name(0.into()..0.into()))),
}, );
canon_stream_with_functor.len().into(), let cannon_stream_name = "#$.length";
))), lexer_test(
cannon_stream_name,
Single(Err(LexerError::empty_canon_name(1.into()..1.into()))),
); );
} }