aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/regex/enfa.rs259
1 files changed, 231 insertions, 28 deletions
diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs
index 76b5e63..b702b40 100644
--- a/src/parse/regex/enfa.rs
+++ b/src/parse/regex/enfa.rs
@@ -31,10 +31,216 @@ pub struct ENFA<S: Stage> {
pub states: Vec<EState<S>>,
}
+#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+struct Thread {
+ own_state: StateId,
+ positives: Vec<Thread>,
+ negatives: Vec<Thread>,
+}
+
+fn cartesian_product<T: Clone>(x: Vec<Vec<T>>) -> Vec<Vec<T>> {
+ let mut result = vec![Vec::new()];
+
+ for xs in x {
+ let mut next = Vec::new();
+
+ for prefix in &result {
+ for item in &xs {
+ let mut v = prefix.clone();
+ v.push(item.clone());
+ next.push(v);
+ }
+ }
+
+ result = next;
+ }
+
+ result
+}
+
+#[cfg(test)]
+mod product_tests {
+ use super::cartesian_product;
+
+ #[test]
+ fn basic_case() {
+ let x = vec![vec![1, 2], vec![10, 20]];
+
+ let out = cartesian_product(x);
+
+ assert_eq!(
+ out,
+ vec![vec![1, 10], vec![1, 20], vec![2, 10], vec![2, 20],]
+ );
+ }
+
+ #[test]
+ fn single_dimension() {
+ let x = vec![vec![1, 2, 3]];
+
+ let out = cartesian_product(x);
+
+ assert_eq!(out, vec![vec![1], vec![2], vec![3],]);
+ }
+
+ #[test]
+ fn empty_outer_vector() {
+ let x: Vec<Vec<i32>> = vec![];
+
+ let out = cartesian_product(x);
+
+ // One empty combination.
+ assert_eq!(out, vec![vec![]]);
+ }
+
+ #[test]
+ fn empty_inner_vector() {
+ let x = vec![vec![1, 2], vec![], vec![3, 4]];
+
+ let out = cartesian_product(x);
+
+ assert!(out.is_empty());
+ }
+
+ #[test]
+ fn output_size_matches_product() {
+ let x = vec![vec![1, 2, 3], vec![4, 5], vec![6, 7, 8, 9]];
+
+ let out = cartesian_product(x);
+
+ assert_eq!(out.len(), 3 * 2 * 4);
+ }
+
+ #[test]
+ fn every_output_has_correct_length() {
+ let x = vec![vec!['a', 'b'], vec!['x', 'y', 'z'], vec!['0']];
+
+ let out = cartesian_product(x);
+
+ assert!(out.iter().all(|v| v.len() == 3));
+ }
+
+ #[test]
+ fn works_with_strings() {
+ let x = vec![
+ vec!["a".to_string(), "b".to_string()],
+ vec!["x".to_string()],
+ ];
+
+ let out = cartesian_product(x);
+
+ assert_eq!(
+ out,
+ vec![
+ vec!["a".to_string(), "x".to_string()],
+ vec!["b".to_string(), "x".to_string()],
+ ]
+ );
+ }
+}
+
+impl Thread {
+ fn new(
+ enfa: &ENFA<Resolved>,
+ own_state: StateId,
+ mut positives: Vec<Thread>,
+ mut negatives: Vec<Thread>,
+ ) -> Option<Self> {
+ positives.sort();
+ positives.dedup();
+ positives.retain(|t| !t.accept(enfa));
+ positives.shrink_to_fit();
+
+ negatives.sort();
+ negatives.dedup();
+ negatives.shrink_to_fit();
+
+ if negatives.iter().any(|t| t.accept(enfa)) {
+ return None;
+ }
+
+ Some(Self {
+ own_state,
+ positives,
+ negatives,
+ })
+ }
+
+ fn accept(&self, enfa: &ENFA<Resolved>) -> bool {
+ let pos = self.positives.iter().all(|t| t.accept(enfa));
+ let neg = self.negatives.iter().all(|t| !t.accept(enfa));
+ let this = match enfa.states[self.own_state].accept {
+ Acceptance::Accept => true,
+ Acceptance::Assertion => true,
+ Acceptance::NotYet => false,
+ };
+ pos && neg && this
+ }
+
+ fn possible_transitions(&self, enfa: &ENFA<Resolved>, f: &mut impl FnMut(ByteRange)) {
+ for t in enfa.states[self.own_state].trans.iter() {
+ f(t.consumes);
+ }
+ for t in self.positives.iter() {
+ t.possible_transitions(enfa, f);
+ }
+ for t in self.negatives.iter() {
+ t.possible_transitions(enfa, f);
+ }
+ }
+
+ fn transition(&self, enfa: &ENFA<Resolved>, ch: ByteRange) -> Vec<Self> {
+ // if any of our negatives currently accept, we must kill this thread rn.
+ if self.negatives.iter().any(|t| t.accept(enfa)) {
+ return Vec::new();
+ }
+
+ let pos: Vec<Vec<Self>> = self
+ .positives
+ .iter()
+ .map(|t| t.transition(enfa, ch))
+ .collect();
+ let neg: Vec<Vec<Self>> = self
+ .negatives
+ .iter()
+ .map(|t| t.transition(enfa, ch))
+ .collect();
+
+ let pos = cartesian_product(pos);
+ let neg = cartesian_product(neg);
+
+ let mut out = Vec::new();
+
+ for p in pos {
+ for n in neg.clone() {
+ for transition in enfa.states[self.own_state].trans.iter() {
+ if transition.consumes.overlaps(ch) {
+ let mut p = p.clone();
+ let mut n = n.clone();
+ for a in transition.assert.iter() {
+ if let Some(thread) = Thread::new(enfa, a.to, Vec::new(), Vec::new()) {
+ match a.polarity {
+ LookPolarity::Positive => p.push(thread),
+ LookPolarity::Negative => n.push(thread),
+ }
+ }
+ }
+ if let Some(thread) = Thread::new(enfa, transition.to, p, n) {
+ out.push(thread);
+ }
+ }
+ }
+ }
+ }
+
+ out
+ }
+}
+
#[derive(Clone)]
pub struct MultiState<'a> {
nfa: &'a ENFA<Resolved>,
- states: Vec<StateId>,
+ threads: Vec<Thread>,
accept: bool,
hash: u64,
}
@@ -42,7 +248,7 @@ pub struct MultiState<'a> {
impl<'a> PartialEq for MultiState<'a> {
fn eq(&self, other: &Self) -> bool {
(self.nfa as *const ENFA<Resolved> as u64) == (other.nfa as *const ENFA<Resolved> as u64)
- && self.states == other.states
+ && self.threads == other.threads
&& self.accept == other.accept
&& self.hash == other.hash
}
@@ -50,21 +256,19 @@ impl<'a> PartialEq for MultiState<'a> {
impl<'a> Eq for MultiState<'a> {}
impl<'a> MultiState<'a> {
- pub fn new(nfa: &'a ENFA<Resolved>, mut states: Vec<StateId>) -> Self {
- states.sort();
- states.dedup();
- states.shrink_to_fit();
+ fn new(nfa: &'a ENFA<Resolved>, mut threads: Vec<Thread>) -> Self {
+ threads.sort();
+ threads.dedup();
+ threads.shrink_to_fit();
- let accept = states
- .iter()
- .any(|&x| nfa.states[x].accept == Acceptance::Accept);
+ let accept = threads.iter().any(|t| t.accept(nfa));
let mut hasher = DefaultHasher::new();
- states.hash(&mut hasher);
+ threads.hash(&mut hasher);
let hash = hasher.finish();
Self {
nfa,
- states,
+ threads,
accept,
hash,
}
@@ -72,11 +276,10 @@ impl<'a> MultiState<'a> {
/// all the chars that will make an interesting transition
pub fn possible_transitions(&self) -> Vec<ByteRange> {
- let mut vec: Vec<_> = self
- .states
- .iter()
- .flat_map(|&i| self.nfa.states[i].trans.iter().map(|x| x.consumes))
- .collect();
+ let mut vec = Vec::new();
+ for t in self.threads.iter() {
+ t.possible_transitions(self.nfa, &mut |x| vec.push(x));
+ }
vec = ByteRange::non_overlapping(vec);
vec.sort();
vec.dedup();
@@ -86,17 +289,9 @@ impl<'a> MultiState<'a> {
pub fn transition(&self, ch: ByteRange) -> Self {
let new_states = self
- .states
+ .threads
.iter()
- .flat_map(|&s| {
- self.nfa.states[s].trans.iter().filter_map(|t| {
- if t.consumes.overlaps(ch) {
- Some(t.to)
- } else {
- None
- }
- })
- })
+ .flat_map(|t| t.transition(self.nfa, ch))
.collect();
Self::new(self.nfa, new_states)
@@ -169,7 +364,12 @@ impl<S: Stage> ENFA<S> {
}
impl ENFA<Epsilon> {
- fn epsilon_dfs(&self, i: StateId, visited: &mut [Option<Vec<Assertion>>], path: Vec<Assertion>) {
+ fn epsilon_dfs(
+ &self,
+ i: StateId,
+ visited: &mut [Option<Vec<Assertion>>],
+ path: Vec<Assertion>,
+ ) {
if visited[i].is_some() {
return;
}
@@ -278,7 +478,10 @@ impl ENFA<Epsilon> {
impl ENFA<Resolved> {
pub fn start_multi_state<'a>(&'a self) -> MultiState<'a> {
- MultiState::new(self, vec![0])
+ MultiState::new(
+ self,
+ vec![Thread::new(self, 0, Vec::new(), Vec::new()).unwrap()],
+ )
}
pub fn void_multi_state<'a>(&'a self) -> MultiState<'a> {