diff options
Diffstat (limited to 'src/parse/regex/dfa.rs')
| -rw-r--r-- | src/parse/regex/dfa.rs | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/src/parse/regex/dfa.rs b/src/parse/regex/dfa.rs new file mode 100644 index 0000000..aba6238 --- /dev/null +++ b/src/parse/regex/dfa.rs @@ -0,0 +1,110 @@ +use core::fmt; +use std::collections::HashMap; + +use super::{ + byte_range::ByteRange, + enfa::{ENFA, MultiState}, +}; + +pub type StateId = usize; + +pub struct State { + trans: HashMap<ByteRange, StateId>, + default_trans: StateId, + accept: bool, +} + +pub struct DFA { + start: StateId, + states: Vec<State>, +} + +impl fmt::Debug for DFA { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "DFA {{")?; + for (i, s) in self.states.iter().enumerate() { + if self.start == i { + write!(f, "-> {i}: ")?; + } else { + write!(f, " {i}: ")?; + } + + for (chr, to) in s.trans.iter() { + write!(f, "{chr:?} to {to}, ")?; + } + + write!(f, "default to {}", s.default_trans)?; + if s.accept { + write!(f, ", accept")?; + } + writeln!(f)?; + } + writeln!(f, "}}") + } +} + +impl DFA { + pub fn matches(&self, x: &[u8]) -> bool { + let mut state = self.start; + 'next_byte: for &b in x.iter() { + for (range, &next_state) in self.states[state].trans.iter() { + if range.contains(b) { + state = next_state; + continue 'next_byte; + } + } + state = self.states[state].default_trans; + } + self.states[state].accept + } +} + +impl From<ENFA> for DFA { + fn from(mut nfa: ENFA) -> Self { + nfa.simplify(); + + for s in nfa.states.iter() { + if !s.epsilon_trans.is_empty() { + panic!( + "NFA simplification did not remove epsilon transitions - cannot proceed with powerset construction." + ); + } + } + + let mut multi_states = nfa.all_multi_states(); + multi_states.insert(nfa.void_multi_state()); + let mut len = 0; + let multi_to_dfa: HashMap<MultiState, StateId> = multi_states + .clone() + .into_iter() + .map(|ms| { + len += 1; + (ms, len - 1) + }) + .collect(); + + let void = multi_to_dfa[&nfa.void_multi_state()]; + + let mut states: Vec<State> = (0..len) + .map(|_| State { + trans: HashMap::new(), + default_trans: void, + accept: false, + }) + .collect(); + + for ms in multi_states.iter() { + let i: usize = multi_to_dfa[&ms]; + states[i].accept = ms.accept(); + for t in ms.possible_transitions() { + let k = multi_to_dfa[&ms.transition(t)]; + states[i].trans.insert(t, k); + } + } + + Self { + start: multi_to_dfa[&nfa.start_multi_state()], + states, + } + } +} |
