diff options
Diffstat (limited to 'src/regex/dfa.rs')
| -rw-r--r-- | src/regex/dfa.rs | 156 |
1 files changed, 112 insertions, 44 deletions
diff --git a/src/regex/dfa.rs b/src/regex/dfa.rs index 5687c8b..0e47b89 100644 --- a/src/regex/dfa.rs +++ b/src/regex/dfa.rs @@ -11,20 +11,64 @@ use super::{ pub type StateId = usize; -pub struct State { +trait DfaDecider: From<HashMapDecider> { + fn decide(&self, byte: u8) -> StateId; +} + +pub struct HashMapDecider { trans: HashMap<ByteRange, StateId>, - fast_trans: DecisionTree<StateId>, default_trans: StateId, +} + +impl DfaDecider for HashMapDecider { + fn decide(&self, byte: u8) -> StateId { + for (range, to) in self.trans.iter() { + if range.contains(byte) { + return *to; + } + } + self.default_trans + } +} + +impl From<HashMapDecider> for DecisionTree<StateId> { + fn from(value: HashMapDecider) -> Self { + DecisionTree::build(value.trans, value.default_trans) + } +} +impl DfaDecider for DecisionTree<StateId> { + fn decide(&self, byte: u8) -> StateId { + DecisionTree::decide(&self, byte) + } +} + +impl From<HashMapDecider> for [u32; 256] { + fn from(value: HashMapDecider) -> Self { + let mut table = [0; 256]; + for i in 0..256 { + table[i] = value.decide(i as u8) as u32; + } + table + } +} +impl DfaDecider for [u32; 256] { + fn decide(&self, byte: u8) -> StateId { + self[byte as usize] as StateId + } +} + +pub struct State<D> { + decision: D, accept: bool, } #[allow(clippy::upper_case_acronyms)] -pub struct DFA { +pub struct DFA<D = HashMapDecider> { start: StateId, - states: Vec<State>, + states: Vec<State<D>>, } -impl fmt::Debug for DFA { +impl fmt::Debug for DFA<HashMapDecider> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "DFA {{")?; for (i, s) in self.states.iter().enumerate() { @@ -34,11 +78,11 @@ impl fmt::Debug for DFA { write!(f, " {i}: ")?; } - for (chr, to) in s.trans.iter() { + for (chr, to) in s.decision.trans.iter() { write!(f, "{chr:?} to {to}, ")?; } - write!(f, "dfl to {}", s.default_trans)?; + write!(f, "dfl to {}", s.decision.default_trans)?; if s.accept { write!(f, ", accept")?; } @@ -66,11 +110,12 @@ impl From<ENFA> for DFA { let void = multi_to_dfa[&nfa.void_multi_state()]; - let mut states: Vec<State> = (0..len) + let mut states: Vec<State<_>> = (0..len) .map(|_| State { - fast_trans: DecisionTree::default(), - trans: HashMap::new(), - default_trans: void, + decision: HashMapDecider { + trans: HashMap::new(), + default_trans: void, + }, accept: false, }) .collect(); @@ -80,14 +125,10 @@ impl From<ENFA> for DFA { states[i].accept = ms.accept(); for t in ms.possible_transitions() { let k = multi_to_dfa[&ms.transition(t)]; - states[i].trans.insert(t, k); + states[i].decision.trans.insert(t, k); } } - for s in states.iter_mut() { - s.fast_trans = DecisionTree::build(s.trans.clone(), s.default_trans.clone()); - } - let mut this = Self { start: multi_to_dfa[&nfa.start_multi_state()], states, @@ -104,7 +145,7 @@ pub enum DFACompileError { NFAError(EnfaTranslationError), } -impl RegexEngine for DFA { +impl<D: DfaDecider> RegexEngine for DFA<D> { type CompileError = DFACompileError; fn compile(pat: Pattern) -> Result<Self, Self::CompileError> { @@ -113,7 +154,18 @@ impl RegexEngine for DFA { if nfa.has_submatches { Err(DFACompileError::SubmatchesNotSupported) } else { - Ok(Self::from(nfa)) + let this = DFA::from(nfa); + Ok(Self { + states: this + .states + .into_iter() + .map(|s| State { + decision: D::from(s.decision), + accept: s.accept, + }) + .collect(), + start: this.start, + }) } } Err(e) => Err(DFACompileError::NFAError(e)), @@ -121,7 +173,24 @@ impl RegexEngine for DFA { } fn run(&self, input: &[u8]) -> Option<super::Match> { - if self.matches(input) { + let mut state = self.start; + let mut i = 0; + while i + 7 < input.len() { + state = self.states[state].decision.decide(input[i+0]); + state = self.states[state].decision.decide(input[i+1]); + state = self.states[state].decision.decide(input[i+2]); + state = self.states[state].decision.decide(input[i+3]); + state = self.states[state].decision.decide(input[i+4]); + state = self.states[state].decision.decide(input[i+5]); + state = self.states[state].decision.decide(input[i+6]); + state = self.states[state].decision.decide(input[i+7]); + i += 8; + } + while i < input.len() { + state = self.states[state].decision.decide(input[i]); + i += 1; + } + if self.states[state].accept { Some(Match::new_empty()) } else { None @@ -129,16 +198,6 @@ impl RegexEngine for DFA { } } -impl DFA { - pub fn matches(&self, x: &[u8]) -> bool { - let mut state = self.start; - for &b in x.iter() { - state = self.states[state].fast_trans.decide(b); - } - self.states[state].accept - } -} - mod state_set { #[derive(Hash, Clone, PartialEq, Eq)] pub struct StateSet { @@ -243,21 +302,21 @@ use state_set::StateSet; trait GoesTo { fn goes_to(&self, to: usize) -> bool; } -impl GoesTo for (&State, ByteRange) { +impl GoesTo for (&State<HashMapDecider>, ByteRange) { fn goes_to(&self, target: usize) -> bool { let from = self.0; let ch = self.1; - for (c, &to) in from.trans.iter() { + for (c, &to) in from.decision.trans.iter() { if c.overlaps(ch) && to == target { return true; } } - from.default_trans == target + from.decision.default_trans == target } } impl DFA { - fn states_where(&self, mut f: impl FnMut(&State) -> bool) -> StateSet { + fn states_where(&self, mut f: impl FnMut(&State<HashMapDecider>) -> bool) -> StateSet { let states = self .states .iter() @@ -276,7 +335,7 @@ impl DFA { let mut ranges: Vec<ByteRange> = self .states .iter() - .flat_map(|s| s.trans.iter().map(|t| *t.0)) + .flat_map(|s| s.decision.trans.iter().map(|t| *t.0)) .chain([ByteRange::new_range(u8::MIN, u8::MAX)]) .collect(); ranges.sort(); @@ -360,13 +419,18 @@ impl DFA { for i in 0..self.states.len() { if is_alive(i) { let s = &self.states[i]; - let trans: HashMap<ByteRange, usize> = - s.trans.iter().map(|(&ch, &to)| (ch, remap(to))).collect(); - let default_trans = remap(s.default_trans); + let trans: HashMap<ByteRange, usize> = s + .decision + .trans + .iter() + .map(|(&ch, &to)| (ch, remap(to))) + .collect(); + let default_trans = remap(s.decision.default_trans); new_states.push(State { - fast_trans: DecisionTree::build(trans.clone(), default_trans), - trans, - default_trans, + decision: HashMapDecider { + trans, + default_trans, + }, accept: s.accept, }); } @@ -377,15 +441,19 @@ impl DFA { pub fn minify(&mut self) { for state in self.states.iter_mut() { - state.trans.retain(|_, to| *to != state.default_trans); - if state.trans.len() == 1 + state + .decision + .trans + .retain(|_, to| *to != state.decision.default_trans); + if state.decision.trans.len() == 1 && state + .decision .trans .iter() .all(|t| *t.0 == ByteRange::new_range(0, 255)) { - state.default_trans = state.trans.iter().map(|x| *x.1).next().unwrap(); - state.trans.clear(); + state.decision.default_trans = state.decision.trans.iter().map(|x| *x.1).next().unwrap(); + state.decision.trans.clear(); } } |
