aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/regex/byte_range.rs4
-rw-r--r--src/regex/dfa.rs9
-rw-r--r--src/regex/mod.rs3
-rw-r--r--src/regex/simple.rs24
-rw-r--r--src/run/builtin.rs33
5 files changed, 55 insertions, 18 deletions
diff --git a/src/regex/byte_range.rs b/src/regex/byte_range.rs
index 5123da5..d549a55 100644
--- a/src/regex/byte_range.rs
+++ b/src/regex/byte_range.rs
@@ -24,6 +24,10 @@ impl ByteRange {
Self::new_range(c, c)
}
+ pub fn all() -> Self {
+ Self::new_range(0, 255)
+ }
+
pub fn contains(&self, c: u8) -> bool {
self.from <= c && c <= self.to
}
diff --git a/src/regex/dfa.rs b/src/regex/dfa.rs
index c55d99d..78a216c 100644
--- a/src/regex/dfa.rs
+++ b/src/regex/dfa.rs
@@ -374,6 +374,15 @@ impl DFA {
pub fn minify(&mut self) {
for state in self.states.iter_mut() {
state.trans.retain(|_, to| *to != state.default_trans);
+ if state.trans.len() == 1
+ && state
+ .trans
+ .iter()
+ .all(|t| *t.0 == ByteRange::new_range(0, 255))
+ {
+ state.default_trans = state.trans.iter().map(|x| *x.1).next().unwrap();
+ state.trans.clear();
+ }
}
self.hopcroft_minimization();
diff --git a/src/regex/mod.rs b/src/regex/mod.rs
index 2c9f3d1..438361a 100644
--- a/src/regex/mod.rs
+++ b/src/regex/mod.rs
@@ -283,8 +283,9 @@ impl RegexEngine for CompiledPattern {
macro_rules! all_engines {
($ty_name:ident, $($x:ident : $ty:ty,)*) => {
+ #[derive(Debug)]
pub struct $ty_name {
- $($x: Option<$ty>,)*
+ $(pub $x: Option<$ty>,)*
}
impl RegexEngine for $ty_name {
type CompileError = ();
diff --git a/src/regex/simple.rs b/src/regex/simple.rs
index e75ff14..4286322 100644
--- a/src/regex/simple.rs
+++ b/src/regex/simple.rs
@@ -8,6 +8,7 @@ fn empty_match() -> Option<Match> {
})
}
+#[derive(Debug)]
pub struct Anything;
#[derive(Debug, Clone)]
@@ -17,12 +18,19 @@ impl RegexEngine for Anything {
type CompileError = NotASimpleWildcard;
fn compile(pat: Pattern) -> Result<Self, Self::CompileError> {
- if let Pattern::Rep(pat, 0, None, _) = pat
- && let Pattern::CharacterClass(Class::Everything) = *pat
- {
- Ok(Anything)
- } else {
- Err(NotASimpleWildcard)
+ match pat {
+ Pattern::Rep(pat, 0, None, _) => match *pat {
+ Pattern::CharacterClass(Class::Everything) => Ok(Anything),
+ _ => Err(NotASimpleWildcard),
+ },
+ Pattern::Concat(pats) | Pattern::Alt(pats) => {
+ if !pats.is_empty() && pats.into_iter().all(|p| Anything::compile(p).is_ok()) {
+ Ok(Anything)
+ } else {
+ Err(NotASimpleWildcard)
+ }
+ }
+ _ => Err(NotASimpleWildcard),
}
}
@@ -31,6 +39,7 @@ impl RegexEngine for Anything {
}
}
+#[derive(Debug)]
pub struct Nothing;
#[derive(Debug, Clone)]
pub struct NotASimpleNothing;
@@ -70,8 +79,9 @@ impl RegexEngine for Nothing {
}
}
+#[derive(Debug)]
pub struct Exact {
- bytes: Vec<u8>,
+ pub bytes: Vec<u8>,
}
const MEM_LIMIT: usize = 25_000;
diff --git a/src/run/builtin.rs b/src/run/builtin.rs
index 45d33e2..2d19348 100644
--- a/src/run/builtin.rs
+++ b/src/run/builtin.rs
@@ -1039,7 +1039,7 @@ impl Builtin for pish_theme {
#[cfg(debug_assertions)]
mod dbg {
- use crate::regex::{dfa::DFA, enfa::ENFA};
+ use crate::regex::{AllEngines, RegexEngine};
use super::*;
@@ -1123,17 +1123,30 @@ mod dbg {
}
};
- let nfa = match ENFA::try_from(regex) {
- Ok(nfa) => nfa,
- Err(err) => {
- writeln!(stdout, "nfa error: {err:?}")?;
- return Err(Error::Exit(2));
- }
+ let Ok(compiled) = AllEngines::compile(regex) else {
+ writeln!(stdout, "failed to compile.")?;
+ return Err(Error::Exit(2));
};
- writeln!(stdout, "{nfa:?}")?;
- let dfa = DFA::from(nfa);
- writeln!(stdout, "{dfa:?}")?;
+ if let Some(dfa) = compiled.dfa.as_ref() {
+ writeln!(stdout, "{dfa:?}")?;
+ }
+
+ if let Some(bc) = compiled.bc.as_ref() {
+ writeln!(stdout, "{bc:?}")?;
+ }
+
+ if compiled.any.is_some() {
+ writeln!(stdout, "any")?;
+ }
+
+ if compiled.nothing.is_some() {
+ writeln!(stdout, "nothing")?;
+ }
+
+ if let Some(exact) = compiled.exact.as_ref() {
+ writeln!(stdout, "exact({})", exact.bytes.escape_ascii())?;
+ }
Ok(())
}