aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse/regex/dfa.rs
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-05-31 19:21:44 +0200
committerJonas Maier <jonas@x77.dev>2026-05-31 19:21:44 +0200
commit81759dd51eb1f6f9a7dc8af9b2b8126ff7dfab24 (patch)
treeaf451a170b30e05148088d477b725023d02c505d /src/parse/regex/dfa.rs
parent08f3af622cc3e7b3f85a60c6ffe83d9d70e9dc02 (diff)
downloadpish-81759dd51eb1f6f9a7dc8af9b2b8126ff7dfab24.tar.gz
regex based case statements
Diffstat (limited to 'src/parse/regex/dfa.rs')
-rw-r--r--src/parse/regex/dfa.rs110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/parse/regex/dfa.rs b/src/parse/regex/dfa.rs
new file mode 100644
index 0000000..aba6238
--- /dev/null
+++ b/src/parse/regex/dfa.rs
@@ -0,0 +1,110 @@
+use core::fmt;
+use std::collections::HashMap;
+
+use super::{
+ byte_range::ByteRange,
+ enfa::{ENFA, MultiState},
+};
+
+pub type StateId = usize;
+
+pub struct State {
+ trans: HashMap<ByteRange, StateId>,
+ default_trans: StateId,
+ accept: bool,
+}
+
+pub struct DFA {
+ start: StateId,
+ states: Vec<State>,
+}
+
+impl fmt::Debug for DFA {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ writeln!(f, "DFA {{")?;
+ for (i, s) in self.states.iter().enumerate() {
+ if self.start == i {
+ write!(f, "-> {i}: ")?;
+ } else {
+ write!(f, " {i}: ")?;
+ }
+
+ for (chr, to) in s.trans.iter() {
+ write!(f, "{chr:?} to {to}, ")?;
+ }
+
+ write!(f, "default to {}", s.default_trans)?;
+ if s.accept {
+ write!(f, ", accept")?;
+ }
+ writeln!(f)?;
+ }
+ writeln!(f, "}}")
+ }
+}
+
+impl DFA {
+ pub fn matches(&self, x: &[u8]) -> bool {
+ let mut state = self.start;
+ 'next_byte: for &b in x.iter() {
+ for (range, &next_state) in self.states[state].trans.iter() {
+ if range.contains(b) {
+ state = next_state;
+ continue 'next_byte;
+ }
+ }
+ state = self.states[state].default_trans;
+ }
+ self.states[state].accept
+ }
+}
+
+impl From<ENFA> for DFA {
+ fn from(mut nfa: ENFA) -> Self {
+ nfa.simplify();
+
+ for s in nfa.states.iter() {
+ if !s.epsilon_trans.is_empty() {
+ panic!(
+ "NFA simplification did not remove epsilon transitions - cannot proceed with powerset construction."
+ );
+ }
+ }
+
+ let mut multi_states = nfa.all_multi_states();
+ multi_states.insert(nfa.void_multi_state());
+ let mut len = 0;
+ let multi_to_dfa: HashMap<MultiState, StateId> = multi_states
+ .clone()
+ .into_iter()
+ .map(|ms| {
+ len += 1;
+ (ms, len - 1)
+ })
+ .collect();
+
+ let void = multi_to_dfa[&nfa.void_multi_state()];
+
+ let mut states: Vec<State> = (0..len)
+ .map(|_| State {
+ trans: HashMap::new(),
+ default_trans: void,
+ accept: false,
+ })
+ .collect();
+
+ for ms in multi_states.iter() {
+ let i: usize = multi_to_dfa[&ms];
+ states[i].accept = ms.accept();
+ for t in ms.possible_transitions() {
+ let k = multi_to_dfa[&ms.transition(t)];
+ states[i].trans.insert(t, k);
+ }
+ }
+
+ Self {
+ start: multi_to_dfa[&nfa.start_multi_state()],
+ states,
+ }
+ }
+}