aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse/regex/dfa.rs
blob: 2fd193533117a67e7ef2923454e1bb35c584f7d4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
use core::fmt;
use std::collections::HashMap;

use super::{
    byte_range::ByteRange,
    enfa::{ENFA, MultiState},
};

pub type StateId = usize;

pub struct State {
    trans: HashMap<ByteRange, StateId>,
    default_trans: StateId,
    accept: bool,
}

#[allow(clippy::upper_case_acronyms)]
pub struct DFA {
    start: StateId,
    states: Vec<State>,
}

impl fmt::Debug for DFA {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "DFA {{")?;
        for (i, s) in self.states.iter().enumerate() {
            if self.start == i {
                write!(f, "-> {i}: ")?;
            } else {
                write!(f, "   {i}: ")?;
            }

            for (chr, to) in s.trans.iter() {
                write!(f, "{chr:?} to {to}, ")?;
            }

            write!(f, "default to {}", s.default_trans)?;
            if s.accept {
                write!(f, ", accept")?;
            }
            writeln!(f)?;
        }
        writeln!(f, "}}")
    }
}

impl DFA {
    pub fn matches(&self, x: &[u8]) -> bool {
        let mut state = self.start;
        'next_byte: for &b in x.iter() {
            for (range, &next_state) in self.states[state].trans.iter() {
                if range.contains(b) {
                    state = next_state;
                    continue 'next_byte;
                }
            }
            state = self.states[state].default_trans;
        }
        self.states[state].accept
    }
}

impl From<ENFA> for DFA {
    fn from(mut nfa: ENFA) -> Self {
        nfa.simplify();

        for s in nfa.states.iter() {
            if s.trans.iter().any(|t| t.is_epsilon()) {
                panic!(
                    "NFA simplification did not remove epsilon transitions - cannot proceed with powerset construction."
                );
            }
        }

        let mut multi_states = nfa.all_multi_states();
        multi_states.insert(nfa.void_multi_state());
        let mut len = 0;
        let multi_to_dfa: HashMap<MultiState, StateId> = multi_states
            .clone()
            .into_iter()
            .map(|ms| {
                len += 1;
                (ms, len - 1)
            })
            .collect();

        let void = multi_to_dfa[&nfa.void_multi_state()];

        let mut states: Vec<State> = (0..len)
            .map(|_| State {
                trans: HashMap::new(),
                default_trans: void,
                accept: false,
            })
            .collect();

        for ms in multi_states.iter() {
            let i: usize = multi_to_dfa[ms];
            states[i].accept = ms.accept();
            for t in ms.possible_transitions() {
                let k = multi_to_dfa[&ms.transition(t)];
                states[i].trans.insert(t, k);
            }
        }

        Self {
            start: multi_to_dfa[&nfa.start_multi_state()],
            states,
        }
    }
}