aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse/regex/mod.rs
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-06-02 12:39:41 +0200
committerJonas Maier <jonas@x77.dev>2026-06-02 12:39:41 +0200
commit1a138779293823177613238591768077d781de05 (patch)
tree68323afdce3703e9fa933830ec196ba33fc51eaa /src/parse/regex/mod.rs
parent9d1a342cea994a9d912f348deec5cdb2032b4189 (diff)
downloadpish-1a138779293823177613238591768077d781de05.tar.gz
regex: parse lookahead and lookbehind
Diffstat (limited to 'src/parse/regex/mod.rs')
-rw-r--r--src/parse/regex/mod.rs53
1 files changed, 49 insertions, 4 deletions
diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs
index 51527ae..10d332e 100644
--- a/src/parse/regex/mod.rs
+++ b/src/parse/regex/mod.rs
@@ -6,6 +6,18 @@ mod byte_range;
mod dfa;
mod enfa;
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+pub enum LookDirection {
+ Ahead,
+ Behind,
+}
+
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+pub enum LookPolarity {
+ Positive,
+ Negative,
+}
+
#[derive(PartialEq, Debug, Clone)]
pub enum Pattern {
Byte(u8),
@@ -13,6 +25,7 @@ pub enum Pattern {
Alt(Vec<Pattern>),
Concat(Vec<Pattern>),
Rep(Box<Pattern>, u32, Option<u32>),
+ Assertion(LookDirection, LookPolarity, Box<Pattern>),
Nothing,
}
@@ -166,6 +179,22 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result<Pattern> {
}
b'(' => {
s.adv();
+
+ let mut assertion = None;
+ if s.buf.starts_with(b"?=") {
+ s.advance(2);
+ assertion = Some((LookDirection::Ahead, LookPolarity::Positive));
+ } else if s.buf.starts_with(b"?!") {
+ s.advance(2);
+ assertion = Some((LookDirection::Ahead, LookPolarity::Negative));
+ } else if s.buf.starts_with(b"?<=") {
+ s.advance(3);
+ assertion = Some((LookDirection::Behind, LookPolarity::Positive));
+ } else if s.buf.starts_with(b"?<!") {
+ s.advance(3);
+ assertion = Some((LookDirection::Behind, LookPolarity::Negative));
+ }
+
s.highlight_from(begin, OtherHighlights::RegexSymbol);
let inner = parse0(s)?;
if !s.has() {
@@ -176,7 +205,12 @@ fn parse_atom(s: &mut super::Cursor<'_>) -> Result<Pattern> {
return Err(ParseError::Expected(')'));
}
s.highlight_from(begin, OtherHighlights::RegexSymbol);
- Ok(inner)
+
+ if let Some((dir, pol)) = assertion {
+ Ok(Pattern::Assertion(dir, pol, Box::new(inner)))
+ } else {
+ Ok(inner)
+ }
}
b'.' => {
s.adv();
@@ -217,11 +251,22 @@ impl std::fmt::Debug for CompiledPattern {
}
}
+#[derive(Debug)]
+pub enum CompilationError {
+ Enfa(enfa::EnfaTranslationError),
+}
+
+impl From<enfa::EnfaTranslationError> for CompilationError {
+ fn from(value: enfa::EnfaTranslationError) -> Self {
+ Self::Enfa(value)
+ }
+}
+
impl Pattern {
- pub fn compile(self) -> CompiledPattern {
- let enfa = enfa::ENFA::from(self);
+ pub fn try_compile(self) -> std::result::Result<CompiledPattern, CompilationError> {
+ let enfa = enfa::ENFA::try_from(self)?;
let dfa = dfa::DFA::from(enfa);
- CompiledPattern { dfa }
+ Ok(CompiledPattern { dfa })
}
}