aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-06-02 21:31:50 +0200
committerJonas Maier <jonas@x77.dev>2026-06-02 21:31:50 +0200
commit5647a7645739fa2aa2dea4fe059b4efe84a278bc (patch)
treeb3471284c623f917037ba3e8379081afc3ca98de
parent5caaf765648b12a8b6002633f849f7fc4f1efc21 (diff)
downloadpish-5647a7645739fa2aa2dea4fe059b4efe84a278bc.tar.gz
seems to work
-rw-r--r--src/parse/regex/byte_range.rs84
-rw-r--r--src/parse/regex/dfa.rs14
-rw-r--r--src/parse/regex/enfa.rs2
-rw-r--r--test-cases/case2_lookahead/script.sh12
4 files changed, 73 insertions, 39 deletions
diff --git a/src/parse/regex/byte_range.rs b/src/parse/regex/byte_range.rs
index 86b2c2b..0151f0c 100644
--- a/src/parse/regex/byte_range.rs
+++ b/src/parse/regex/byte_range.rs
@@ -16,7 +16,7 @@ impl From<RangeInclusive<u8>> for ByteRange {
impl ByteRange {
pub fn new_range(from: u8, to: u8) -> Self {
- assert!(from <= to);
+ assert!(from <= to, "{from} <= {to}");
Self { from, to }
}
@@ -33,42 +33,68 @@ impl ByteRange {
self.from.max(other.from) <= self.to.min(other.to)
}
- pub fn non_overlapping(sets: Vec<ByteRange>) -> Vec<ByteRange> {
- let begins = sets.iter().map(|cs| (cs.from, 1));
- let ends = sets.iter().map(|cs| (cs.to, 2));
- let mut edges: Vec<_> = begins.chain(ends).collect();
- edges.sort();
- edges.iter_mut().for_each(|c| {
- if c.1 == 2 {
- c.1 = -1;
+ pub fn split_to_disjoint(ranges: Vec<ByteRange>) -> Vec<ByteRange> {
+ if ranges.is_empty() {
+ return vec![];
+ }
+
+ let mut points: Vec<u8> = Vec::new();
+ for r in &ranges {
+ points.push(r.from);
+ if r.to != u8::MAX {
+ points.push(r.to + 1);
}
- });
-
- let mut last = None;
- let mut depth = 0;
+ }
+
+ points.sort_unstable();
+ points.dedup();
+
let mut out = Vec::new();
-
- for (mut loc, delta) in edges {
- if let Some(last) = last
- && last <= loc
- {
- out.push(ByteRange::new_range(last, loc));
- loc += 1;
+
+ for window in points.windows(2) {
+ let start = window[0];
+ let end_exclusive = window[1];
+
+ if start >= end_exclusive {
+ continue;
}
-
- depth += delta;
-
- if depth > 0 {
- last = Some(loc);
- } else {
- last = None;
+
+ let mut active = false;
+
+ for r in &ranges {
+ if r.from <= start && start <= r.to {
+ active = true;
+ break;
+ }
+ }
+
+ if active {
+ out.push(ByteRange {
+ from: start,
+ to: end_exclusive - 1,
+ });
}
}
-
+
out
}
}
+#[test]
+fn byterange_test() {
+ assert_eq!(
+ ByteRange::split_to_disjoint(vec![
+ ByteRange::new_range(b'a', b'z'),
+ ByteRange::new_single(b'm')
+ ]),
+ vec![
+ ByteRange::new_range(b'a', b'l'),
+ ByteRange::new_single(b'm'),
+ ByteRange::new_range(b'n', b'z'),
+ ]
+ );
+}
+
impl std::fmt::Debug for ByteRange {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.from == self.to {
@@ -106,7 +132,7 @@ mod non_overlapping_tests {
fn run(ranges: Vec<RangeInclusive<u8>>) {
let ranges1: Vec<ByteRange> = ranges.into_iter().map(Into::into).collect();
- let ranges2 = ByteRange::non_overlapping(ranges1.clone());
+ let ranges2 = ByteRange::split_to_disjoint(ranges1.clone());
let r1 = |c| ranges1.iter().any(|cr| cr.contains(c));
let r2 = |c| ranges2.iter().any(|cr| cr.contains(c));
diff --git a/src/parse/regex/dfa.rs b/src/parse/regex/dfa.rs
index 78888a2..243176d 100644
--- a/src/parse/regex/dfa.rs
+++ b/src/parse/regex/dfa.rs
@@ -3,7 +3,7 @@ use std::collections::HashMap;
use super::{
byte_range::ByteRange,
- enfa::{ENFA, Epsilon, MultiState, Resolved},
+ enfa::{ENFA, MultiState},
};
pub type StateId = usize;
@@ -60,10 +60,12 @@ impl DFA {
}
}
-impl From<ENFA<Resolved>> for DFA {
- fn from(mut nfa: ENFA<Resolved>) -> Self {
+impl From<ENFA> for DFA {
+ fn from(mut nfa: ENFA) -> Self {
nfa.remove_unreachable();
+ println!("{nfa:?}");
+
let mut multi_states = nfa.all_multi_states();
multi_states.insert(nfa.void_multi_state());
let mut len = 0;
@@ -101,9 +103,3 @@ impl From<ENFA<Resolved>> for DFA {
}
}
}
-
-impl From<ENFA<Epsilon>> for DFA {
- fn from(value: ENFA<Epsilon>) -> Self {
- Self::from(value.resolve_epsilon())
- }
-}
diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs
index 3cac2dd..fea2976 100644
--- a/src/parse/regex/enfa.rs
+++ b/src/parse/regex/enfa.rs
@@ -336,7 +336,7 @@ impl<'a> MultiState<'a> {
for t in self.threads.iter() {
t.possible_transitions(self.nfa, &mut |x| vec.push(x));
}
- vec = ByteRange::non_overlapping(vec);
+ vec = ByteRange::split_to_disjoint(vec);
vec.sort();
vec.dedup();
vec.shrink_to_fit();
diff --git a/test-cases/case2_lookahead/script.sh b/test-cases/case2_lookahead/script.sh
index 1a2226b..a77eb85 100644
--- a/test-cases/case2_lookahead/script.sh
+++ b/test-cases/case2_lookahead/script.sh
@@ -36,3 +36,15 @@ match x1 aa no
match x1 ax no
match x1 axx no
match x1 aax no
+
+fun x2 {
+ case $1 {
+ x(?=foo)(?=...bar).* { echo yes }
+ .* { echo no }
+ }
+}
+match x2 xfoobar yes
+match x2 xfoobar_some_more_stuff yes
+match x2 x___bar no
+match x2 xfoo___ no
+match x2 xfoo no