diff options
| -rw-r--r-- | src/parse/regex/enfa.rs | 34 | ||||
| -rw-r--r-- | src/parse/regex/mod.rs | 53 | ||||
| -rw-r--r-- | src/run/builtin.rs | 11 | ||||
| -rw-r--r-- | src/run/mod.rs | 5 |
4 files changed, 85 insertions, 18 deletions
diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs index 3809595..b3d3c4a 100644 --- a/src/parse/regex/enfa.rs +++ b/src/parse/regex/enfa.rs @@ -328,10 +328,17 @@ impl EState { } } -impl From<Pattern> for ENFA { - fn from(value: Pattern) -> Self { - match value { - Pattern::Byte(c) => Self::from(Pattern::Range(c, c)), +#[derive(Debug)] +pub enum EnfaTranslationError { + AssertionsNotSupported, +} + +impl TryFrom<Pattern> for ENFA { + type Error = EnfaTranslationError; + + fn try_from(value: Pattern) -> Result<Self, Self::Error> { + Ok(match value { + Pattern::Byte(c) => Self::try_from(Pattern::Range(c, c))?, Pattern::Range(c1, c2) => Self { states: vec![ EState { @@ -343,7 +350,10 @@ impl From<Pattern> for ENFA { ], }, Pattern::Alt(alts) => { - let nfas: Vec<ENFA> = alts.into_iter().map(ENFA::from).collect(); + let nfas: Vec<ENFA> = alts + .into_iter() + .map(Self::try_from) + .collect::<Result<_, _>>()?; let mut states = vec![EState::start()]; let mut ends = vec![]; for nfa in nfas.into_iter() { @@ -360,18 +370,21 @@ impl From<Pattern> for ENFA { Self { states } } Pattern::Concat(seq) => { - let nfas: Vec<Self> = seq.into_iter().map(ENFA::from).collect(); + let nfas: Vec<Self> = seq + .into_iter() + .map(Self::try_from) + .collect::<Result<_, _>>()?; Self::concat(nfas) } Pattern::Rep(regex, min, None) => { - let nfa = ENFA::from(*regex); + let nfa = ENFA::try_from(*regex)?; let base = nfa.clone().repeat(min as usize); let tail = nfa.looping(); Self::concat(vec![base, tail]) } Pattern::Rep(regex, min, Some(max)) => { assert!(min < max); - let nfa = Self::from(*regex); + let nfa = Self::try_from(*regex)?; let base = nfa.clone().repeat(min as usize); let tail = nfa.optx((max - min) as usize); Self::concat(vec![base, tail]) @@ -379,6 +392,9 @@ impl From<Pattern> for ENFA { Pattern::Nothing => Self { states: vec![EState::terminal()], }, - } + Pattern::Assertion(..) => { + return Err(EnfaTranslationError::AssertionsNotSupported); + } + }) } } diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs index 51527ae..10d332e 100644 --- a/src/parse/regex/mod.rs +++ b/src/parse/regex/mod.rs @@ -6,6 +6,18 @@ mod byte_range; mod dfa; mod enfa; +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub enum LookDirection { + Ahead, + Behind, +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub enum LookPolarity { + Positive, + Negative, +} + #[derive(PartialEq, Debug, Clone)] pub enum Pattern { Byte(u8), @@ -13,6 +25,7 @@ pub enum Pattern { Alt(Vec<Pattern>), Concat(Vec<Pattern>), Rep(Box<Pattern>, u32, Option<u32>), + Assertion(LookDirection, LookPolarity, Box<Pattern>), Nothing, } @@ -166,6 +179,22 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result<Pattern> { } b'(' => { s.adv(); + + let mut assertion = None; + if s.buf.starts_with(b"?=") { + s.advance(2); + assertion = Some((LookDirection::Ahead, LookPolarity::Positive)); + } else if s.buf.starts_with(b"?!") { + s.advance(2); + assertion = Some((LookDirection::Ahead, LookPolarity::Negative)); + } else if s.buf.starts_with(b"?<=") { + s.advance(3); + assertion = Some((LookDirection::Behind, LookPolarity::Positive)); + } else if s.buf.starts_with(b"?<!") { + s.advance(3); + assertion = Some((LookDirection::Behind, LookPolarity::Negative)); + } + s.highlight_from(begin, OtherHighlights::RegexSymbol); let inner = parse0(s)?; if !s.has() { @@ -176,7 +205,12 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result<Pattern> { return Err(ParseError::Expected(')')); } s.highlight_from(begin, OtherHighlights::RegexSymbol); - Ok(inner) + + if let Some((dir, pol)) = assertion { + Ok(Pattern::Assertion(dir, pol, Box::new(inner))) + } else { + Ok(inner) + } } b'.' => { s.adv(); @@ -217,11 +251,22 @@ impl std::fmt::Debug for CompiledPattern { } } +#[derive(Debug)] +pub enum CompilationError { + Enfa(enfa::EnfaTranslationError), +} + +impl From<enfa::EnfaTranslationError> for CompilationError { + fn from(value: enfa::EnfaTranslationError) -> Self { + Self::Enfa(value) + } +} + impl Pattern { - pub fn compile(self) -> CompiledPattern { - let enfa = enfa::ENFA::from(self); + pub fn try_compile(self) -> std::result::Result<CompiledPattern, CompilationError> { + let enfa = enfa::ENFA::try_from(self)?; let dfa = dfa::DFA::from(enfa); - CompiledPattern { dfa } + Ok(CompiledPattern { dfa }) } } diff --git a/src/run/builtin.rs b/src/run/builtin.rs index f3682f7..fd4cbed 100644 --- a/src/run/builtin.rs +++ b/src/run/builtin.rs @@ -1116,13 +1116,18 @@ mod dbg { let regex = match crate::parse::regex::Pattern::parse_from_bytes(&args[0]) { Ok(r) => r, Err(e) => { - writeln!(stdout, "not a valid regex: {e:?}")?; + writeln!(stdout, "parse error: {e:?}")?; return Err(Error::Exit(1)); }, }; - let compiled = regex.compile(); - writeln!(stdout, "{compiled:?}")?; + match regex.try_compile() { + Ok(compiled) => writeln!(stdout, "{compiled:?}")?, + Err(e) => { + writeln!(stdout, "compilation error: {e:?}")?; + return Err(Error::Exit(2)); + }, + } Ok(()) } diff --git a/src/run/mod.rs b/src/run/mod.rs index 009954b..c730272 100644 --- a/src/run/mod.rs +++ b/src/run/mod.rs @@ -457,8 +457,9 @@ impl Executor { ) -> SpawnedCmd { for branch in c.branches.into_iter() { // TODO: do not compile every time - let compiled = branch.pattern.compile(); - if compiled.matches(&c.discriminant) { + if let Ok(compiled) = branch.pattern.try_compile() + && compiled.matches(&c.discriminant) + { return self.execute_block(branch.block, stdin, stdout); } } |
