aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/regex/dfa.rs2
-rw-r--r--src/parse/regex/enfa.rs143
2 files changed, 94 insertions, 51 deletions
diff --git a/src/parse/regex/dfa.rs b/src/parse/regex/dfa.rs
index 7e5be7b..2fd1935 100644
--- a/src/parse/regex/dfa.rs
+++ b/src/parse/regex/dfa.rs
@@ -65,7 +65,7 @@ impl From<ENFA> for DFA {
nfa.simplify();
for s in nfa.states.iter() {
- if !s.epsilon_trans.is_empty() {
+ if s.trans.iter().any(|t| t.is_epsilon()) {
panic!(
"NFA simplification did not remove epsilon transitions - cannot proceed with powerset construction."
);
diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs
index b3d3c4a..6b7b7bb 100644
--- a/src/parse/regex/enfa.rs
+++ b/src/parse/regex/enfa.rs
@@ -55,7 +55,7 @@ impl<'a> MultiState<'a> {
let mut vec: Vec<_> = self
.states
.iter()
- .flat_map(|&i| self.nfa.states[i].trans.iter().map(|x| x.0))
+ .flat_map(|&i| self.nfa.states[i].trans.iter().map(|x| x.consumes.unwrap()))
.collect();
vec = ByteRange::non_overlapping(vec);
vec.sort();
@@ -69,10 +69,13 @@ impl<'a> MultiState<'a> {
.states
.iter()
.flat_map(|&s| {
- self.nfa.states[s]
- .trans
- .iter()
- .filter_map(|&(c, k)| if c.overlaps(ch) { Some(k) } else { None })
+ self.nfa.states[s].trans.iter().filter_map(|t| {
+ if t.consumes.unwrap().overlaps(ch) {
+ Some(t.to)
+ } else {
+ None
+ }
+ })
})
.collect();
@@ -108,14 +111,8 @@ impl ENFA {
let mut s = self.states;
for state in s.iter_mut() {
- let trans = state.trans.iter().map(|(c, id)| (*c, id + amt)).collect();
- let epsilon_trans = state.epsilon_trans.iter().map(|e| e + amt).collect();
-
- *state = EState {
- trans,
- epsilon_trans,
- accept: false,
- }
+ state.remap(|i| i + amt);
+ state.accept = false;
}
s
@@ -126,8 +123,10 @@ impl ENFA {
return;
}
visited[i] = true;
- for &k in self.states[i].epsilon_trans.iter() {
- self.epsilon_dfs(k, visited);
+ for t in self.states[i].trans.iter() {
+ if t.is_epsilon() {
+ self.epsilon_dfs(t.to, visited);
+ }
}
}
@@ -145,12 +144,16 @@ impl ENFA {
})
.collect();
- // inlining
+ // clear epsilons
+ for s in self.states.iter_mut() {
+ s.trans.retain(|t| !t.is_epsilon());
+ }
+
+ // inline real transitions
for i in 0..self.states.len() {
for &k in includes[i].iter() {
let new = self.states[k].trans.clone();
self.states[i].trans.extend(new);
- self.states[i].epsilon_trans.clear();
if self.states[k].accept {
self.states[i].accept = true;
}
@@ -162,10 +165,7 @@ impl ENFA {
let mut used = vec![false; self.states.len()];
used[0] = true;
for s in self.states.iter() {
- for &i in s.epsilon_trans.iter() {
- used[i] = true;
- }
- for &(_, i) in s.trans.iter() {
+ for i in s.reachable_states() {
used[i] = true;
}
}
@@ -184,18 +184,7 @@ impl ENFA {
}
}
for s in self.states.iter_mut() {
- s.epsilon_trans = s
- .epsilon_trans
- .clone()
- .into_iter()
- .map(|i| remap[i])
- .collect();
- s.trans = s
- .trans
- .clone()
- .into_iter()
- .map(|(c, i)| (c, remap[i]))
- .collect();
+ s.remap(|i| remap[i]);
}
}
@@ -236,8 +225,8 @@ impl ENFA {
let mut states = vec![EState::start()];
states.append(&mut self.shift(1));
let len = states.len();
- states[0].epsilon_trans = set![1, len];
- states[len - 1].epsilon_trans = set![0, len];
+ states[0].set_epsilon_transitions([Transition::epsilon(1), Transition::epsilon(len)]);
+ states[len - 1].set_epsilon_transitions([Transition::epsilon(0), Transition::epsilon(len)]);
states.push(EState::terminal());
Self { states }
}
@@ -253,7 +242,9 @@ impl ENFA {
let mut repped = self.repeat(x);
assert_eq!(repped.states.len(), x * len);
for i in 1..=x {
- repped.states[0].epsilon_trans.insert(i * len - 1);
+ repped.states[0]
+ .trans
+ .insert(Transition::epsilon(i * len - 1));
}
repped
}
@@ -270,7 +261,8 @@ impl ENFA {
let len = states.len();
let mut ns = nfa.shift(len);
if let Some(n) = states.last_mut() {
- n.epsilon_trans = set![len];
+ n.trans.retain(|t| t.consumes.is_some());
+ n.trans.insert(Transition::epsilon(len));
}
states.append(&mut ns);
}
@@ -287,11 +279,13 @@ impl std::fmt::Debug for ENFA {
writeln!(f, "NFA {{")?;
for (i, s) in self.states.iter().enumerate() {
write!(f, " {i}: ")?;
- for k in s.epsilon_trans.iter() {
- write!(f, "~>{k} ")?;
- }
- for (c, k) in s.trans.iter() {
- write!(f, "{c:?}=>{k} ")?;
+ for t in s.trans.iter() {
+ let k = t.to;
+ if let Some(c) = t.consumes {
+ write!(f, "{c:?}=>{k} ")?;
+ } else {
+ write!(f, "~>{k} ")?;
+ }
}
if s.accept {
write!(f, "accept")?;
@@ -304,10 +298,38 @@ impl std::fmt::Debug for ENFA {
pub type StateId = usize;
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+pub struct Transition {
+ to: StateId,
+ consumes: Option<ByteRange>,
+}
+
+impl Transition {
+ fn new(consumes: ByteRange, to: StateId) -> Self {
+ let consumes = Some(consumes);
+ Self { to, consumes }
+ }
+
+ fn epsilon(to: StateId) -> Self {
+ Self { to, consumes: None }
+ }
+
+ fn remap(&mut self, mut f: impl FnMut(StateId) -> StateId) {
+ self.to = f(self.to);
+ }
+
+ fn reachable_states(&self) -> impl Iterator<Item = StateId> {
+ [self.to].into_iter()
+ }
+
+ pub fn is_epsilon(&self) -> bool {
+ self.consumes.is_none()
+ }
+}
+
#[derive(Debug, Clone)]
pub struct EState {
- pub trans: HashSet<(ByteRange, StateId)>,
- pub epsilon_trans: HashSet<StateId>,
+ pub trans: HashSet<Transition>,
pub accept: bool,
}
@@ -315,17 +337,39 @@ impl EState {
fn start() -> Self {
Self {
trans: HashSet::new(),
- epsilon_trans: HashSet::new(),
accept: false,
}
}
fn terminal() -> Self {
Self {
trans: HashSet::new(),
- epsilon_trans: HashSet::new(),
accept: true,
}
}
+
+ fn set_epsilon_transitions(&mut self, trans: impl IntoIterator<Item = Transition>) {
+ self.trans.retain(|t| t.consumes.is_some());
+ for transition in trans.into_iter() {
+ assert!(transition.consumes.is_none());
+ self.trans.insert(transition);
+ }
+ }
+
+ fn remap(&mut self, mut f: impl FnMut(StateId) -> StateId) {
+ self.trans = self
+ .trans
+ .iter()
+ .cloned()
+ .map(|mut t| {
+ t.remap(&mut f);
+ t
+ })
+ .collect();
+ }
+
+ fn reachable_states(&self) -> impl Iterator<Item = StateId> {
+ self.trans.iter().flat_map(|t| t.reachable_states())
+ }
}
#[derive(Debug)]
@@ -342,8 +386,7 @@ impl TryFrom<Pattern> for ENFA {
Pattern::Range(c1, c2) => Self {
states: vec![
EState {
- trans: set![(ByteRange::new_range(c1, c2), 1)],
- epsilon_trans: set![],
+ trans: set![Transition::new(ByteRange::new_range(c1, c2), 1)],
accept: false,
},
EState::terminal(),
@@ -358,14 +401,14 @@ impl TryFrom<Pattern> for ENFA {
let mut ends = vec![];
for nfa in nfas.into_iter() {
let len = states.len();
- states[0].epsilon_trans.insert(len);
+ states[0].trans.insert(Transition::epsilon(len));
states.append(&mut (nfa.shift(len)));
ends.push(states.len() - 1);
}
states.push(EState::terminal());
for end in ends.into_iter() {
let last = states.len() - 1;
- states[end].epsilon_trans.insert(last);
+ states[end].trans.insert(Transition::epsilon(last));
}
Self { states }
}