From 24d41bb4daf081bb9cd63a2107b28b1878594ed3 Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Sat, 6 Jun 2026 14:29:21 +0200 Subject: tried to implement a decision tree for faster dfa, but it is not faster --- src/regex/dfa.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src/regex/dfa.rs') diff --git a/src/regex/dfa.rs b/src/regex/dfa.rs index 78a216c..5687c8b 100644 --- a/src/regex/dfa.rs +++ b/src/regex/dfa.rs @@ -1,7 +1,7 @@ use core::fmt; use std::collections::{BinaryHeap, HashMap, HashSet}; -use crate::regex::{Match, RegexEngine, enfa::EnfaTranslationError}; +use crate::regex::{Match, RegexEngine, decision_tree::DecisionTree, enfa::EnfaTranslationError}; use super::{ Pattern, @@ -13,6 +13,7 @@ pub type StateId = usize; pub struct State { trans: HashMap, + fast_trans: DecisionTree, default_trans: StateId, accept: bool, } @@ -67,6 +68,7 @@ impl From for DFA { let mut states: Vec = (0..len) .map(|_| State { + fast_trans: DecisionTree::default(), trans: HashMap::new(), default_trans: void, accept: false, @@ -82,6 +84,10 @@ impl From for DFA { } } + for s in states.iter_mut() { + s.fast_trans = DecisionTree::build(s.trans.clone(), s.default_trans.clone()); + } + let mut this = Self { start: multi_to_dfa[&nfa.start_multi_state()], states, @@ -126,14 +132,8 @@ impl RegexEngine for DFA { impl DFA { pub fn matches(&self, x: &[u8]) -> bool { let mut state = self.start; - 'next_byte: for &b in x.iter() { - for (range, &next_state) in self.states[state].trans.iter() { - if range.contains(b) { - state = next_state; - continue 'next_byte; - } - } - state = self.states[state].default_trans; + for &b in x.iter() { + state = self.states[state].fast_trans.decide(b); } self.states[state].accept } @@ -360,9 +360,13 @@ impl DFA { for i in 0..self.states.len() { if is_alive(i) { let s = &self.states[i]; + let trans: HashMap = + s.trans.iter().map(|(&ch, &to)| (ch, remap(to))).collect(); + let default_trans = remap(s.default_trans); new_states.push(State { - trans: s.trans.iter().map(|(&ch, &to)| (ch, remap(to))).collect(), - default_trans: remap(s.default_trans), + fast_trans: DecisionTree::build(trans.clone(), default_trans), + trans, + default_trans, accept: s.accept, }); } -- cgit v1.2.3