diff options
| author | Jonas Maier <jonas@x77.dev> | 2026-06-05 15:03:44 +0200 |
|---|---|---|
| committer | Jonas Maier <jonas@x77.dev> | 2026-06-05 15:03:44 +0200 |
| commit | a464bec4d558533776329634504ab0c1bee84bba (patch) | |
| tree | ae19c073bf41ad52d97f951a1ffe10429d341199 /src/parse/regex/mod.rs | |
| parent | 2184c1cab66771594051197451c5e0f4dd2567a2 (diff) | |
| download | pish-a464bec4d558533776329634504ab0c1bee84bba.tar.gz | |
some bytecode implementation, does not work yet
Diffstat (limited to 'src/parse/regex/mod.rs')
| -rw-r--r-- | src/parse/regex/mod.rs | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs index 63808cd..1bcf18c 100644 --- a/src/parse/regex/mod.rs +++ b/src/parse/regex/mod.rs @@ -5,6 +5,7 @@ use super::{Parse, ParseError, Result}; mod byte_range; pub mod dfa; pub mod enfa; +pub mod bc; #[derive(PartialEq, Eq, Debug, Clone, Copy)] pub enum LookDirection { @@ -12,6 +13,15 @@ pub enum LookDirection { Behind, } +impl LookDirection { + pub fn reverse(self) -> Self { + match self { + Self::Ahead => Self::Behind, + Self::Behind => Self::Ahead, + } + } +} + #[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] pub enum LookPolarity { Positive, @@ -29,6 +39,100 @@ pub enum Pattern { Nothing, } +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum ByteConsumption { + Bounded(usize), + Unbounded, +} + +impl ByteConsumption { + pub fn zero() -> Self { + Self::Bounded(0) + } + pub fn one() -> Self { + Self::Bounded(1) + } +} + +impl Ord for ByteConsumption { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (Self::Bounded(a), Self::Bounded(b)) => a.cmp(&b), + (Self::Bounded(_), Self::Unbounded) => std::cmp::Ordering::Less, + (Self::Unbounded, Self::Bounded(_)) => std::cmp::Ordering::Greater, + (Self::Unbounded, Self::Unbounded) => std::cmp::Ordering::Equal, + } + } +} +impl PartialOrd for ByteConsumption { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl std::ops::Add for ByteConsumption { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + if let Self::Bounded(a) = self + && let Self::Bounded(b) = rhs + { + Self::Bounded(a + b) + } else { + Self::Unbounded + } + } +} + +impl std::ops::Mul<usize> for ByteConsumption { + type Output = Self; + + fn mul(self, rhs: usize) -> Self::Output { + match self { + Self::Bounded(x) => Self::Bounded(x * rhs), + Self::Unbounded => Self::Unbounded, + } + } +} + +impl std::iter::Sum for ByteConsumption { + fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { + iter.fold(Self::zero(), |a, b| a + b) + } +} + +impl Pattern { + pub fn max_byte_consumption(&self) -> ByteConsumption { + match self { + Pattern::Byte(_) => ByteConsumption::one(), + Pattern::Range(_, _) => ByteConsumption::one(), + Pattern::Alt(patterns) => patterns + .iter() + .map(Self::max_byte_consumption) + .max() + .unwrap_or(ByteConsumption::zero()), + Pattern::Concat(patterns) => patterns.iter().map(Self::max_byte_consumption).sum(), + Pattern::Rep(pattern, _, Some(max_reps)) => { + pattern.max_byte_consumption() * (*max_reps as usize) + } + Pattern::Rep(_, _, None) => ByteConsumption::Unbounded, + Pattern::Assertion(_, _, _) => ByteConsumption::zero(), + Pattern::Nothing => ByteConsumption::zero(), + } + } + + pub fn reverse(self) -> Self { + use Pattern::*; + match self { + Byte(_) | Nothing | Range(..) => self, + Alt(patterns) => Alt(patterns.into_iter().map(Self::reverse).collect()), + Concat(patterns) => Concat(patterns.into_iter().map(Self::reverse).rev().collect()), + Rep(pattern, min, max) => Rep(Box::new(pattern.reverse()), min, max), + Assertion(dir, pol, pat) => Assertion(dir.reverse(), pol, Box::new(pat.reverse())), + } + } +} + impl Parse for Pattern { fn parse(b: &mut super::Cursor<'_>) -> super::Result<Self> { let begin = b.loc(); |
