aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex/simple.rs
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-06-06 12:15:52 +0200
committerJonas Maier <jonas@x77.dev>2026-06-06 12:15:52 +0200
commit53980774c327675e886179c0a2c140744dcf9b95 (patch)
treeca1fdcc9938fce2c10c51e0a51659c6ba38ac5ba /src/regex/simple.rs
parent75e0c29cf91ddc6299c14a94a038c3e3df3d2805 (diff)
downloadpish-53980774c327675e886179c0a2c140744dcf9b95.tar.gz
special cased regex for performance
Diffstat (limited to 'src/regex/simple.rs')
-rw-r--r--src/regex/simple.rs125
1 files changed, 125 insertions, 0 deletions
diff --git a/src/regex/simple.rs b/src/regex/simple.rs
new file mode 100644
index 0000000..00bc9b4
--- /dev/null
+++ b/src/regex/simple.rs
@@ -0,0 +1,125 @@
+use crate::regex::CharacterClass;
+
+use super::{Match, Pattern, RegexEngine};
+
+fn empty_match() -> Option<Match> {
+ Some(Match {
+ submatches: [].into(),
+ })
+}
+
+pub struct Anything;
+
+#[derive(Debug, Clone)]
+pub struct NotASimpleWildcard;
+
+impl RegexEngine for Anything {
+ type CompileError = NotASimpleWildcard;
+
+ fn compile(pat: Pattern) -> Result<Self, Self::CompileError> {
+ if let Pattern::Rep(pat, 0, None, _) = pat
+ && let Pattern::CharacterClass(CharacterClass::Everything) = *pat
+ {
+ Ok(Anything)
+ } else {
+ Err(NotASimpleWildcard)
+ }
+ }
+
+ fn run(&self, _input: &[u8]) -> Option<Match> {
+ empty_match()
+ }
+}
+
+pub struct Nothing;
+#[derive(Debug, Clone)]
+pub struct NotASimpleNothing;
+
+impl RegexEngine for Nothing {
+ type CompileError = NotASimpleNothing;
+
+ fn compile(pat: Pattern) -> Result<Self, Self::CompileError> {
+ match pat {
+ Pattern::Range(a, b) if a > b => Ok(Nothing),
+ Pattern::CharacterClass(CharacterClass::Nothing) => Ok(Nothing),
+ Pattern::Alt(pats) => {
+ let all_impossible = pats.into_iter().map(Self::compile).all(|p| p.is_ok());
+ if all_impossible {
+ Ok(Nothing)
+ } else {
+ Err(NotASimpleNothing)
+ }
+ }
+ Pattern::Concat(pats) => {
+ if let Some(pat) = pats.into_iter().next() {
+ Self::compile(pat)
+ } else {
+ Err(NotASimpleNothing)
+ }
+ }
+ Pattern::Rep(_, x, Some(y), _) if y < x => Ok(Nothing),
+ Pattern::Rep(_, 0, None, _) => Err(NotASimpleNothing),
+ Pattern::Rep(pat, _gt_0, _, _) => Self::compile(*pat),
+ Pattern::Submatch(pat) => Self::compile(*pat),
+ _ => Err(NotASimpleNothing),
+ }
+ }
+
+ fn run(&self, _input: &[u8]) -> Option<Match> {
+ None
+ }
+}
+
+pub struct Exact {
+ bytes: Vec<u8>,
+}
+
+const MEM_LIMIT: usize = 25_000;
+
+#[derive(Debug, Clone)]
+pub struct NotSimplyAString;
+
+fn ce(pat: Pattern) -> Option<Vec<u8>> {
+ match pat {
+ Pattern::Byte(b) => Some(vec![b]),
+ Pattern::Concat(patterns) => {
+ let mut pats = patterns.into_iter().map(ce).collect::<Option<Vec<_>>>()?;
+ let mut out = Vec::new();
+ for p in pats.iter_mut() {
+ out.append(p);
+ }
+ Some(out)
+ }
+ Pattern::Rep(pat, min, Some(max), _) if min == max => {
+ if let Some(bytes) = ce(*pat)
+ && bytes.len() * (min as usize) < MEM_LIMIT
+ {
+ Some(bytes.repeat(min as usize))
+ } else {
+ None
+ }
+ }
+ Pattern::Submatch(_) => None, // TODO: submatches could be stored as constant offsets
+ Pattern::Nothing => Some(Vec::new()),
+ _ => None,
+ }
+}
+
+impl RegexEngine for Exact {
+ type CompileError = NotSimplyAString;
+
+ fn compile(pat: Pattern) -> Result<Self, Self::CompileError> {
+ match ce(pat) {
+ Some(bytes) => Ok(Self { bytes }),
+ None => Err(NotSimplyAString),
+ }
+ }
+
+ fn run(&self, input: &[u8]) -> Option<Match> {
+ if input == self.bytes {
+ empty_match()
+ } else {
+ None
+ }
+ }
+}