From d39ed8fc77981f937c35fa84a7ff5d288d0c7181 Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Sat, 6 Jun 2026 13:45:54 +0200 Subject: clean up --- src/regex/byte_range.rs | 4 ++++ src/regex/dfa.rs | 9 +++++++++ src/regex/mod.rs | 3 ++- src/regex/simple.rs | 24 +++++++++++++++++------- src/run/builtin.rs | 33 +++++++++++++++++++++++---------- 5 files changed, 55 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/regex/byte_range.rs b/src/regex/byte_range.rs index 5123da5..d549a55 100644 --- a/src/regex/byte_range.rs +++ b/src/regex/byte_range.rs @@ -24,6 +24,10 @@ impl ByteRange { Self::new_range(c, c) } + pub fn all() -> Self { + Self::new_range(0, 255) + } + pub fn contains(&self, c: u8) -> bool { self.from <= c && c <= self.to } diff --git a/src/regex/dfa.rs b/src/regex/dfa.rs index c55d99d..78a216c 100644 --- a/src/regex/dfa.rs +++ b/src/regex/dfa.rs @@ -374,6 +374,15 @@ impl DFA { pub fn minify(&mut self) { for state in self.states.iter_mut() { state.trans.retain(|_, to| *to != state.default_trans); + if state.trans.len() == 1 + && state + .trans + .iter() + .all(|t| *t.0 == ByteRange::new_range(0, 255)) + { + state.default_trans = state.trans.iter().map(|x| *x.1).next().unwrap(); + state.trans.clear(); + } } self.hopcroft_minimization(); diff --git a/src/regex/mod.rs b/src/regex/mod.rs index 2c9f3d1..438361a 100644 --- a/src/regex/mod.rs +++ b/src/regex/mod.rs @@ -283,8 +283,9 @@ impl RegexEngine for CompiledPattern { macro_rules! all_engines { ($ty_name:ident, $($x:ident : $ty:ty,)*) => { + #[derive(Debug)] pub struct $ty_name { - $($x: Option<$ty>,)* + $(pub $x: Option<$ty>,)* } impl RegexEngine for $ty_name { type CompileError = (); diff --git a/src/regex/simple.rs b/src/regex/simple.rs index e75ff14..4286322 100644 --- a/src/regex/simple.rs +++ b/src/regex/simple.rs @@ -8,6 +8,7 @@ fn empty_match() -> Option { }) } +#[derive(Debug)] pub struct Anything; #[derive(Debug, Clone)] @@ -17,12 +18,19 @@ impl RegexEngine for Anything { type CompileError = NotASimpleWildcard; fn compile(pat: Pattern) -> Result { - if let Pattern::Rep(pat, 0, None, _) = pat - && let Pattern::CharacterClass(Class::Everything) = *pat - { - Ok(Anything) - } else { - Err(NotASimpleWildcard) + match pat { + Pattern::Rep(pat, 0, None, _) => match *pat { + Pattern::CharacterClass(Class::Everything) => Ok(Anything), + _ => Err(NotASimpleWildcard), + }, + Pattern::Concat(pats) | Pattern::Alt(pats) => { + if !pats.is_empty() && pats.into_iter().all(|p| Anything::compile(p).is_ok()) { + Ok(Anything) + } else { + Err(NotASimpleWildcard) + } + } + _ => Err(NotASimpleWildcard), } } @@ -31,6 +39,7 @@ impl RegexEngine for Anything { } } +#[derive(Debug)] pub struct Nothing; #[derive(Debug, Clone)] pub struct NotASimpleNothing; @@ -70,8 +79,9 @@ impl RegexEngine for Nothing { } } +#[derive(Debug)] pub struct Exact { - bytes: Vec, + pub bytes: Vec, } const MEM_LIMIT: usize = 25_000; diff --git a/src/run/builtin.rs b/src/run/builtin.rs index 45d33e2..2d19348 100644 --- a/src/run/builtin.rs +++ b/src/run/builtin.rs @@ -1039,7 +1039,7 @@ impl Builtin for pish_theme { #[cfg(debug_assertions)] mod dbg { - use crate::regex::{dfa::DFA, enfa::ENFA}; + use crate::regex::{AllEngines, RegexEngine}; use super::*; @@ -1123,17 +1123,30 @@ mod dbg { } }; - let nfa = match ENFA::try_from(regex) { - Ok(nfa) => nfa, - Err(err) => { - writeln!(stdout, "nfa error: {err:?}")?; - return Err(Error::Exit(2)); - } + let Ok(compiled) = AllEngines::compile(regex) else { + writeln!(stdout, "failed to compile.")?; + return Err(Error::Exit(2)); }; - writeln!(stdout, "{nfa:?}")?; - let dfa = DFA::from(nfa); - writeln!(stdout, "{dfa:?}")?; + if let Some(dfa) = compiled.dfa.as_ref() { + writeln!(stdout, "{dfa:?}")?; + } + + if let Some(bc) = compiled.bc.as_ref() { + writeln!(stdout, "{bc:?}")?; + } + + if compiled.any.is_some() { + writeln!(stdout, "any")?; + } + + if compiled.nothing.is_some() { + writeln!(stdout, "nothing")?; + } + + if let Some(exact) = compiled.exact.as_ref() { + writeln!(stdout, "exact({})", exact.bytes.escape_ascii())?; + } Ok(()) } -- cgit v1.2.3