From b39823a4f9d13a5d313ca665e06884b587bceb9a Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Wed, 3 Jun 2026 20:17:25 +0200 Subject: regex: explicitly return an error for unsupported non-greedy repetitions --- src/parse/mod.rs | 7 +++++++ src/parse/regex/mod.rs | 39 ++++++++++++++++++++------------------- 2 files changed, 27 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f61baf9..10ea979 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1186,6 +1186,13 @@ pub enum ParseError { NotAWhile, ExpectedKeyword(Keyword), + + NotYetImplemented(NotImplementedKind), +} + +#[derive(Debug, PartialEq)] +pub enum NotImplementedKind { + NonGreedyRegexRepetition, } type Result = std::result::Result; diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs index f35d3f2..72e11da 100644 --- a/src/parse/regex/mod.rs +++ b/src/parse/regex/mod.rs @@ -1,4 +1,4 @@ -use crate::parse::OtherHighlights; +use crate::parse::{NotImplementedKind, OtherHighlights}; use super::{Parse, ParseError, Result}; @@ -98,26 +98,27 @@ fn parse_rep(s: &mut super::Cursor<'_>) -> Result { let begin = s.loc(); - match s.peek() { - b'*' => { - s.adv(); - s.highlight_from(begin, OtherHighlights::RegexSymbol); - Ok(Pattern::Rep(Box::new(atom), 0, None)) - } - b'+' => { - s.adv(); - s.highlight_from(begin, OtherHighlights::RegexSymbol); - Ok(Pattern::Rep(Box::new(atom), 1, None)) - } - b'?' => { - s.adv(); - s.highlight_from(begin, OtherHighlights::RegexSymbol); - Ok(Pattern::Rep(Box::new(atom), 0, Some(1))) + let rep = match s.peek() { + b'*' => Some((0, None)), + b'+' => Some((1, None)), + b'?' => Some((0, Some(1))), + _ => None, + }; + + if let Some((min_rep, max_rep)) = rep { + s.adv(); + s.highlight_from(begin, OtherHighlights::RegexSymbol); + + if s.has() && s.peek() == b'?' { + return Err(ParseError::NotYetImplemented( + NotImplementedKind::NonGreedyRegexRepetition, + )); } - _ => Ok(atom), - } - // TODO: non-greedy + Ok(Pattern::Rep(Box::new(atom), min_rep, max_rep)) + } else { + Ok(atom) + } } const SYMBOLS: &[u8] = b"{}[]()*+-?|.\\ "; -- cgit v1.2.3