From 1a138779293823177613238591768077d781de05 Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Tue, 2 Jun 2026 12:39:41 +0200 Subject: regex: parse lookahead and lookbehind --- src/parse/regex/enfa.rs | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'src/parse/regex/enfa.rs') diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs index 3809595..b3d3c4a 100644 --- a/src/parse/regex/enfa.rs +++ b/src/parse/regex/enfa.rs @@ -328,10 +328,17 @@ impl EState { } } -impl From for ENFA { - fn from(value: Pattern) -> Self { - match value { - Pattern::Byte(c) => Self::from(Pattern::Range(c, c)), +#[derive(Debug)] +pub enum EnfaTranslationError { + AssertionsNotSupported, +} + +impl TryFrom for ENFA { + type Error = EnfaTranslationError; + + fn try_from(value: Pattern) -> Result { + Ok(match value { + Pattern::Byte(c) => Self::try_from(Pattern::Range(c, c))?, Pattern::Range(c1, c2) => Self { states: vec![ EState { @@ -343,7 +350,10 @@ impl From for ENFA { ], }, Pattern::Alt(alts) => { - let nfas: Vec = alts.into_iter().map(ENFA::from).collect(); + let nfas: Vec = alts + .into_iter() + .map(Self::try_from) + .collect::>()?; let mut states = vec![EState::start()]; let mut ends = vec![]; for nfa in nfas.into_iter() { @@ -360,18 +370,21 @@ impl From for ENFA { Self { states } } Pattern::Concat(seq) => { - let nfas: Vec = seq.into_iter().map(ENFA::from).collect(); + let nfas: Vec = seq + .into_iter() + .map(Self::try_from) + .collect::>()?; Self::concat(nfas) } Pattern::Rep(regex, min, None) => { - let nfa = ENFA::from(*regex); + let nfa = ENFA::try_from(*regex)?; let base = nfa.clone().repeat(min as usize); let tail = nfa.looping(); Self::concat(vec![base, tail]) } Pattern::Rep(regex, min, Some(max)) => { assert!(min < max); - let nfa = Self::from(*regex); + let nfa = Self::try_from(*regex)?; let base = nfa.clone().repeat(min as usize); let tail = nfa.optx((max - min) as usize); Self::concat(vec![base, tail]) @@ -379,6 +392,9 @@ impl From for ENFA { Pattern::Nothing => Self { states: vec![EState::terminal()], }, - } + Pattern::Assertion(..) => { + return Err(EnfaTranslationError::AssertionsNotSupported); + } + }) } } -- cgit v1.2.3