aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/parse/regex/enfa.rs34
-rw-r--r--src/parse/regex/mod.rs53
-rw-r--r--src/run/builtin.rs11
-rw-r--r--src/run/mod.rs5
4 files changed, 85 insertions, 18 deletions
diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs
index 3809595..b3d3c4a 100644
--- a/src/parse/regex/enfa.rs
+++ b/src/parse/regex/enfa.rs
@@ -328,10 +328,17 @@ impl EState {
}
}
-impl From<Pattern> for ENFA {
- fn from(value: Pattern) -> Self {
- match value {
- Pattern::Byte(c) => Self::from(Pattern::Range(c, c)),
+#[derive(Debug)]
+pub enum EnfaTranslationError {
+ AssertionsNotSupported,
+}
+
+impl TryFrom<Pattern> for ENFA {
+ type Error = EnfaTranslationError;
+
+ fn try_from(value: Pattern) -> Result<Self, Self::Error> {
+ Ok(match value {
+ Pattern::Byte(c) => Self::try_from(Pattern::Range(c, c))?,
Pattern::Range(c1, c2) => Self {
states: vec![
EState {
@@ -343,7 +350,10 @@ impl From<Pattern> for ENFA {
],
},
Pattern::Alt(alts) => {
- let nfas: Vec<ENFA> = alts.into_iter().map(ENFA::from).collect();
+ let nfas: Vec<ENFA> = alts
+ .into_iter()
+ .map(Self::try_from)
+ .collect::<Result<_, _>>()?;
let mut states = vec![EState::start()];
let mut ends = vec![];
for nfa in nfas.into_iter() {
@@ -360,18 +370,21 @@ impl From<Pattern> for ENFA {
Self { states }
}
Pattern::Concat(seq) => {
- let nfas: Vec<Self> = seq.into_iter().map(ENFA::from).collect();
+ let nfas: Vec<Self> = seq
+ .into_iter()
+ .map(Self::try_from)
+ .collect::<Result<_, _>>()?;
Self::concat(nfas)
}
Pattern::Rep(regex, min, None) => {
- let nfa = ENFA::from(*regex);
+ let nfa = ENFA::try_from(*regex)?;
let base = nfa.clone().repeat(min as usize);
let tail = nfa.looping();
Self::concat(vec![base, tail])
}
Pattern::Rep(regex, min, Some(max)) => {
assert!(min < max);
- let nfa = Self::from(*regex);
+ let nfa = Self::try_from(*regex)?;
let base = nfa.clone().repeat(min as usize);
let tail = nfa.optx((max - min) as usize);
Self::concat(vec![base, tail])
@@ -379,6 +392,9 @@ impl From<Pattern> for ENFA {
Pattern::Nothing => Self {
states: vec![EState::terminal()],
},
- }
+ Pattern::Assertion(..) => {
+ return Err(EnfaTranslationError::AssertionsNotSupported);
+ }
+ })
}
}
diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs
index 51527ae..10d332e 100644
--- a/src/parse/regex/mod.rs
+++ b/src/parse/regex/mod.rs
@@ -6,6 +6,18 @@ mod byte_range;
mod dfa;
mod enfa;
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+pub enum LookDirection {
+ Ahead,
+ Behind,
+}
+
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+pub enum LookPolarity {
+ Positive,
+ Negative,
+}
+
#[derive(PartialEq, Debug, Clone)]
pub enum Pattern {
Byte(u8),
@@ -13,6 +25,7 @@ pub enum Pattern {
Alt(Vec<Pattern>),
Concat(Vec<Pattern>),
Rep(Box<Pattern>, u32, Option<u32>),
+ Assertion(LookDirection, LookPolarity, Box<Pattern>),
Nothing,
}
@@ -166,6 +179,22 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result<Pattern> {
}
b'(' => {
s.adv();
+
+ let mut assertion = None;
+ if s.buf.starts_with(b"?=") {
+ s.advance(2);
+ assertion = Some((LookDirection::Ahead, LookPolarity::Positive));
+ } else if s.buf.starts_with(b"?!") {
+ s.advance(2);
+ assertion = Some((LookDirection::Ahead, LookPolarity::Negative));
+ } else if s.buf.starts_with(b"?<=") {
+ s.advance(3);
+ assertion = Some((LookDirection::Behind, LookPolarity::Positive));
+ } else if s.buf.starts_with(b"?<!") {
+ s.advance(3);
+ assertion = Some((LookDirection::Behind, LookPolarity::Negative));
+ }
+
s.highlight_from(begin, OtherHighlights::RegexSymbol);
let inner = parse0(s)?;
if !s.has() {
@@ -176,7 +205,12 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result<Pattern> {
return Err(ParseError::Expected(')'));
}
s.highlight_from(begin, OtherHighlights::RegexSymbol);
- Ok(inner)
+
+ if let Some((dir, pol)) = assertion {
+ Ok(Pattern::Assertion(dir, pol, Box::new(inner)))
+ } else {
+ Ok(inner)
+ }
}
b'.' => {
s.adv();
@@ -217,11 +251,22 @@ impl std::fmt::Debug for CompiledPattern {
}
}
+#[derive(Debug)]
+pub enum CompilationError {
+ Enfa(enfa::EnfaTranslationError),
+}
+
+impl From<enfa::EnfaTranslationError> for CompilationError {
+ fn from(value: enfa::EnfaTranslationError) -> Self {
+ Self::Enfa(value)
+ }
+}
+
impl Pattern {
- pub fn compile(self) -> CompiledPattern {
- let enfa = enfa::ENFA::from(self);
+ pub fn try_compile(self) -> std::result::Result<CompiledPattern, CompilationError> {
+ let enfa = enfa::ENFA::try_from(self)?;
let dfa = dfa::DFA::from(enfa);
- CompiledPattern { dfa }
+ Ok(CompiledPattern { dfa })
}
}
diff --git a/src/run/builtin.rs b/src/run/builtin.rs
index f3682f7..fd4cbed 100644
--- a/src/run/builtin.rs
+++ b/src/run/builtin.rs
@@ -1116,13 +1116,18 @@ mod dbg {
let regex = match crate::parse::regex::Pattern::parse_from_bytes(&args[0]) {
Ok(r) => r,
Err(e) => {
- writeln!(stdout, "not a valid regex: {e:?}")?;
+ writeln!(stdout, "parse error: {e:?}")?;
return Err(Error::Exit(1));
},
};
- let compiled = regex.compile();
- writeln!(stdout, "{compiled:?}")?;
+ match regex.try_compile() {
+ Ok(compiled) => writeln!(stdout, "{compiled:?}")?,
+ Err(e) => {
+ writeln!(stdout, "compilation error: {e:?}")?;
+ return Err(Error::Exit(2));
+ },
+ }
Ok(())
}
diff --git a/src/run/mod.rs b/src/run/mod.rs
index 009954b..c730272 100644
--- a/src/run/mod.rs
+++ b/src/run/mod.rs
@@ -457,8 +457,9 @@ impl Executor {
) -> SpawnedCmd {
for branch in c.branches.into_iter() {
// TODO: do not compile every time
- let compiled = branch.pattern.compile();
- if compiled.matches(&c.discriminant) {
+ if let Ok(compiled) = branch.pattern.try_compile()
+ && compiled.matches(&c.discriminant)
+ {
return self.execute_block(branch.block, stdin, stdout);
}
}