From 1a138779293823177613238591768077d781de05 Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Tue, 2 Jun 2026 12:39:41 +0200 Subject: regex: parse lookahead and lookbehind --- src/parse/regex/mod.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) (limited to 'src/parse/regex/mod.rs') diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs index 51527ae..10d332e 100644 --- a/src/parse/regex/mod.rs +++ b/src/parse/regex/mod.rs @@ -6,6 +6,18 @@ mod byte_range; mod dfa; mod enfa; +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub enum LookDirection { + Ahead, + Behind, +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub enum LookPolarity { + Positive, + Negative, +} + #[derive(PartialEq, Debug, Clone)] pub enum Pattern { Byte(u8), @@ -13,6 +25,7 @@ pub enum Pattern { Alt(Vec), Concat(Vec), Rep(Box, u32, Option), + Assertion(LookDirection, LookPolarity, Box), Nothing, } @@ -166,6 +179,22 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result { } b'(' => { s.adv(); + + let mut assertion = None; + if s.buf.starts_with(b"?=") { + s.advance(2); + assertion = Some((LookDirection::Ahead, LookPolarity::Positive)); + } else if s.buf.starts_with(b"?!") { + s.advance(2); + assertion = Some((LookDirection::Ahead, LookPolarity::Negative)); + } else if s.buf.starts_with(b"?<=") { + s.advance(3); + assertion = Some((LookDirection::Behind, LookPolarity::Positive)); + } else if s.buf.starts_with(b"?) -> Result { return Err(ParseError::Expected(')')); } s.highlight_from(begin, OtherHighlights::RegexSymbol); - Ok(inner) + + if let Some((dir, pol)) = assertion { + Ok(Pattern::Assertion(dir, pol, Box::new(inner))) + } else { + Ok(inner) + } } b'.' => { s.adv(); @@ -217,11 +251,22 @@ impl std::fmt::Debug for CompiledPattern { } } +#[derive(Debug)] +pub enum CompilationError { + Enfa(enfa::EnfaTranslationError), +} + +impl From for CompilationError { + fn from(value: enfa::EnfaTranslationError) -> Self { + Self::Enfa(value) + } +} + impl Pattern { - pub fn compile(self) -> CompiledPattern { - let enfa = enfa::ENFA::from(self); + pub fn try_compile(self) -> std::result::Result { + let enfa = enfa::ENFA::try_from(self)?; let dfa = dfa::DFA::from(enfa); - CompiledPattern { dfa } + Ok(CompiledPattern { dfa }) } } -- cgit v1.2.3