diff options
| author | Jonas Maier <jonas@x77.dev> | 2026-05-31 19:21:44 +0200 |
|---|---|---|
| committer | Jonas Maier <jonas@x77.dev> | 2026-05-31 19:21:44 +0200 |
| commit | 81759dd51eb1f6f9a7dc8af9b2b8126ff7dfab24 (patch) | |
| tree | af451a170b30e05148088d477b725023d02c505d /src/parse/regex/byte_range.rs | |
| parent | 08f3af622cc3e7b3f85a60c6ffe83d9d70e9dc02 (diff) | |
| download | pish-81759dd51eb1f6f9a7dc8af9b2b8126ff7dfab24.tar.gz | |
regex based case statements
Diffstat (limited to 'src/parse/regex/byte_range.rs')
| -rw-r--r-- | src/parse/regex/byte_range.rs | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/src/parse/regex/byte_range.rs b/src/parse/regex/byte_range.rs new file mode 100644 index 0000000..1ca6d8f --- /dev/null +++ b/src/parse/regex/byte_range.rs @@ -0,0 +1,179 @@ +use std::ops::RangeInclusive; + +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ByteRange { + /// inclusive + from: u8, + /// inclusive + to: u8, +} + +impl From<RangeInclusive<u8>> for ByteRange { + fn from(value: RangeInclusive<u8>) -> Self { + Self::new_range(*value.start(), *value.end()) + } +} + +impl ByteRange { + pub fn new_range(from: u8, to: u8) -> Self { + assert!(from <= to); + Self { from, to } + } + + #[cfg(test)] + pub fn new_single(c: u8) -> Self { + Self::new_range(c, c) + } + + pub fn contains(&self, c: u8) -> bool { + self.from <= c && c <= self.to + } + + pub fn overlaps(&self, other: Self) -> bool { + self.from.max(other.from) <= self.to.min(other.to) + } + + pub fn non_overlapping(sets: Vec<ByteRange>) -> Vec<ByteRange> { + let begins = sets.iter().map(|cs| (cs.from, 1)); + let ends = sets.iter().map(|cs| (cs.to, 2)); + let mut edges: Vec<_> = begins.chain(ends).collect(); + edges.sort(); + edges.iter_mut().for_each(|c| { + if c.1 == 2 { + c.1 = -1; + } + }); + + let mut last = None; + let mut depth = 0; + let mut out = Vec::new(); + + for (mut loc, delta) in edges { + if let Some(last) = last { + if last <= loc { + out.push(ByteRange::new_range(last, loc)); + loc = loc + 1; + } + } + + depth += delta; + + if depth > 0 { + last = Some(loc); + } else { + last = None; + } + } + + out + } +} + +impl std::fmt::Debug for ByteRange { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.from == self.to { + write!(f, "{}", [self.from].escape_ascii()) + } else { + write!( + f, + "{}-{}", + [self.from].escape_ascii(), + [self.to].escape_ascii() + ) + } + } +} + +#[cfg(test)] +mod non_overlapping_tests { + use std::ops::RangeInclusive; + + use super::ByteRange; + + fn middle(r: ByteRange) -> u8 { + let a = r.from as u8; + let b = r.to as u8; + (a + (b - a) / 2) as u8 + } + + fn prev(c: u8) -> u8 { + c - 1 + } + + fn next(c: u8) -> u8 { + c + 1 + } + + fn run(ranges: Vec<RangeInclusive<u8>>) { + let ranges1: Vec<ByteRange> = ranges.into_iter().map(Into::into).collect(); + let ranges2 = ByteRange::non_overlapping(ranges1.clone()); + + let r1 = |c| ranges1.iter().any(|cr| cr.contains(c)); + let r2 = |c| ranges2.iter().any(|cr| cr.contains(c)); + + for &range in ranges1.iter() { + assert!(r1(range.from)); + assert!(r1(range.to)); + assert!(r1(middle(range))); + + assert!(r2(range.from)); + assert!(r2(range.to)); + assert!(r2(middle(range))); + + assert_eq!(r1(prev(range.from)), r2(prev(range.from))); + assert_eq!(r1(next(range.from)), r2(next(range.from))); + } + + for i in 0..ranges2.len() { + for j in 0..i { + assert!( + !ranges2[i].overlaps(ranges2[j]), + "{i} and {j} overlap: {:?}, {:?}", + ranges2[i], + ranges2[j] + ); + } + } + } + + #[test] + fn overlap_correct() { + assert!(ByteRange::new_range(b'a', b'g').overlaps(ByteRange::new_single(b'f'))); + assert!(!ByteRange::new_range(b'a', b'g').overlaps(ByteRange::new_single(b'h'))); + } + + #[test] + fn empty() { + run(vec![]); + } + + #[test] + fn singleton() { + run(vec![b'0'..=b'9']); + } + + #[test] + fn contained1() { + run(vec![b'0'..=b'9', b'5'..=b'6']); + } + + #[test] + fn contained2() { + run(vec![b'5'..=b'6', b'0'..=b'9']); + } + + #[test] + fn overlap2() { + run(vec![b'1'..=b'6', b'4'..=b'9']) + } + + #[test] + fn overlap3() { + run(vec![b'a'..=b'f', b'd'..=b'j', b'g'..=b'm']) + } + + #[test] + fn overlap4() { + run(vec![b'a'..=b'f', b'd'..=b'j', b'g'..=b'm', b'k'..=b'q']) + } +} |
