diff options
Diffstat (limited to 'src/parse/regex/mod.rs')
| -rw-r--r-- | src/parse/regex/mod.rs | 52 |
1 files changed, 43 insertions, 9 deletions
diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs index 1bcf18c..79e234f 100644 --- a/src/parse/regex/mod.rs +++ b/src/parse/regex/mod.rs @@ -2,12 +2,12 @@ use crate::parse::{NotImplementedKind, OtherHighlights}; use super::{Parse, ParseError, Result}; +pub mod bc; mod byte_range; pub mod dfa; pub mod enfa; -pub mod bc; -#[derive(PartialEq, Eq, Debug, Clone, Copy)] +#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] pub enum LookDirection { Ahead, Behind, @@ -28,13 +28,35 @@ pub enum LookPolarity { Negative, } -#[derive(PartialEq, Debug, Clone)] +#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] +pub enum CharacterClass { + Everything, + Nothing, + Whitespace, + Alphabetic, + Alphanumeric, +} + +impl CharacterClass { + pub fn matches(self, byte: u8) -> bool { + match self { + CharacterClass::Everything => true, + CharacterClass::Nothing => false, + CharacterClass::Whitespace => byte.is_ascii_whitespace(), + CharacterClass::Alphabetic => byte.is_ascii_alphabetic(), + CharacterClass::Alphanumeric => byte.is_ascii_alphanumeric(), + } + } +} + +#[derive(PartialEq, Eq, Hash, Debug, Clone)] pub enum Pattern { Byte(u8), Range(u8, u8), + CharacterClass(CharacterClass), Alt(Vec<Pattern>), Concat(Vec<Pattern>), - Rep(Box<Pattern>, u32, Option<u32>), + Rep(Box<Pattern>, u32, Option<u32>, GreedyBehavior), Assertion(LookDirection, LookPolarity, Box<Pattern>), Nothing, } @@ -106,16 +128,17 @@ impl Pattern { match self { Pattern::Byte(_) => ByteConsumption::one(), Pattern::Range(_, _) => ByteConsumption::one(), + Pattern::CharacterClass(_) => ByteConsumption::one(), Pattern::Alt(patterns) => patterns .iter() .map(Self::max_byte_consumption) .max() .unwrap_or(ByteConsumption::zero()), Pattern::Concat(patterns) => patterns.iter().map(Self::max_byte_consumption).sum(), - Pattern::Rep(pattern, _, Some(max_reps)) => { + Pattern::Rep(pattern, _, Some(max_reps), _) => { pattern.max_byte_consumption() * (*max_reps as usize) } - Pattern::Rep(_, _, None) => ByteConsumption::Unbounded, + Pattern::Rep(_, _, None, _) => ByteConsumption::Unbounded, Pattern::Assertion(_, _, _) => ByteConsumption::zero(), Pattern::Nothing => ByteConsumption::zero(), } @@ -124,10 +147,10 @@ impl Pattern { pub fn reverse(self) -> Self { use Pattern::*; match self { - Byte(_) | Nothing | Range(..) => self, + Byte(_) | Nothing | Range(..) | CharacterClass(_) => self, Alt(patterns) => Alt(patterns.into_iter().map(Self::reverse).collect()), Concat(patterns) => Concat(patterns.into_iter().map(Self::reverse).rev().collect()), - Rep(pattern, min, max) => Rep(Box::new(pattern.reverse()), min, max), + Rep(pattern, min, max, greedy) => Rep(Box::new(pattern.reverse()), min, max, greedy), Assertion(dir, pol, pat) => Assertion(dir.reverse(), pol, Box::new(pat.reverse())), } } @@ -217,12 +240,23 @@ fn parse_rep(s: &mut super::Cursor<'_>) -> Result<Pattern> { )); } - Ok(Pattern::Rep(Box::new(atom), min_rep, max_rep)) + Ok(Pattern::Rep( + Box::new(atom), + min_rep, + max_rep, + GreedyBehavior::Greedy, + )) } else { Ok(atom) } } +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum GreedyBehavior { + Greedy, + NonGreedy, +} + const SYMBOLS: &[u8] = b"{}[]()*+-?|.\\ "; fn is_symbol(x: u8) -> bool { SYMBOLS.contains(&x) |
