aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse/regex/mod.rs
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-06-05 21:22:44 +0200
committerJonas Maier <jonas@x77.dev>2026-06-05 21:22:44 +0200
commit05f3f381d0066b2e6116470f4e6251ae191aaefe (patch)
tree063d31bdc10e25a9e97d15cdaddb625a9fcd199f /src/parse/regex/mod.rs
parent65013cb7441dcd56d362613e1ebc469756613b24 (diff)
downloadpish-05f3f381d0066b2e6116470f4e6251ae191aaefe.tar.gz
regex compiler compiles
Diffstat (limited to 'src/parse/regex/mod.rs')
-rw-r--r--src/parse/regex/mod.rs52
1 files changed, 43 insertions, 9 deletions
diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs
index 1bcf18c..79e234f 100644
--- a/src/parse/regex/mod.rs
+++ b/src/parse/regex/mod.rs
@@ -2,12 +2,12 @@ use crate::parse::{NotImplementedKind, OtherHighlights};
use super::{Parse, ParseError, Result};
+pub mod bc;
mod byte_range;
pub mod dfa;
pub mod enfa;
-pub mod bc;
-#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
pub enum LookDirection {
Ahead,
Behind,
@@ -28,13 +28,35 @@ pub enum LookPolarity {
Negative,
}
-#[derive(PartialEq, Debug, Clone)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
+pub enum CharacterClass {
+ Everything,
+ Nothing,
+ Whitespace,
+ Alphabetic,
+ Alphanumeric,
+}
+
+impl CharacterClass {
+ pub fn matches(self, byte: u8) -> bool {
+ match self {
+ CharacterClass::Everything => true,
+ CharacterClass::Nothing => false,
+ CharacterClass::Whitespace => byte.is_ascii_whitespace(),
+ CharacterClass::Alphabetic => byte.is_ascii_alphabetic(),
+ CharacterClass::Alphanumeric => byte.is_ascii_alphanumeric(),
+ }
+ }
+}
+
+#[derive(PartialEq, Eq, Hash, Debug, Clone)]
pub enum Pattern {
Byte(u8),
Range(u8, u8),
+ CharacterClass(CharacterClass),
Alt(Vec<Pattern>),
Concat(Vec<Pattern>),
- Rep(Box<Pattern>, u32, Option<u32>),
+ Rep(Box<Pattern>, u32, Option<u32>, GreedyBehavior),
Assertion(LookDirection, LookPolarity, Box<Pattern>),
Nothing,
}
@@ -106,16 +128,17 @@ impl Pattern {
match self {
Pattern::Byte(_) => ByteConsumption::one(),
Pattern::Range(_, _) => ByteConsumption::one(),
+ Pattern::CharacterClass(_) => ByteConsumption::one(),
Pattern::Alt(patterns) => patterns
.iter()
.map(Self::max_byte_consumption)
.max()
.unwrap_or(ByteConsumption::zero()),
Pattern::Concat(patterns) => patterns.iter().map(Self::max_byte_consumption).sum(),
- Pattern::Rep(pattern, _, Some(max_reps)) => {
+ Pattern::Rep(pattern, _, Some(max_reps), _) => {
pattern.max_byte_consumption() * (*max_reps as usize)
}
- Pattern::Rep(_, _, None) => ByteConsumption::Unbounded,
+ Pattern::Rep(_, _, None, _) => ByteConsumption::Unbounded,
Pattern::Assertion(_, _, _) => ByteConsumption::zero(),
Pattern::Nothing => ByteConsumption::zero(),
}
@@ -124,10 +147,10 @@ impl Pattern {
pub fn reverse(self) -> Self {
use Pattern::*;
match self {
- Byte(_) | Nothing | Range(..) => self,
+ Byte(_) | Nothing | Range(..) | CharacterClass(_) => self,
Alt(patterns) => Alt(patterns.into_iter().map(Self::reverse).collect()),
Concat(patterns) => Concat(patterns.into_iter().map(Self::reverse).rev().collect()),
- Rep(pattern, min, max) => Rep(Box::new(pattern.reverse()), min, max),
+ Rep(pattern, min, max, greedy) => Rep(Box::new(pattern.reverse()), min, max, greedy),
Assertion(dir, pol, pat) => Assertion(dir.reverse(), pol, Box::new(pat.reverse())),
}
}
@@ -217,12 +240,23 @@ fn parse_rep(s: &mut super::Cursor<'_>) -> Result<Pattern> {
));
}
- Ok(Pattern::Rep(Box::new(atom), min_rep, max_rep))
+ Ok(Pattern::Rep(
+ Box::new(atom),
+ min_rep,
+ max_rep,
+ GreedyBehavior::Greedy,
+ ))
} else {
Ok(atom)
}
}
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum GreedyBehavior {
+ Greedy,
+ NonGreedy,
+}
+
const SYMBOLS: &[u8] = b"{}[]()*+-?|.\\ ";
fn is_symbol(x: u8) -> bool {
SYMBOLS.contains(&x)