diff options
Diffstat (limited to 'src/parse/regex/byte_range.rs')
| -rw-r--r-- | src/parse/regex/byte_range.rs | 204 |
1 files changed, 0 insertions, 204 deletions
diff --git a/src/parse/regex/byte_range.rs b/src/parse/regex/byte_range.rs deleted file mode 100644 index b7642c1..0000000 --- a/src/parse/regex/byte_range.rs +++ /dev/null @@ -1,204 +0,0 @@ -use std::ops::RangeInclusive; - -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct ByteRange { - /// inclusive - from: u8, - /// inclusive - to: u8, -} - -impl From<RangeInclusive<u8>> for ByteRange { - fn from(value: RangeInclusive<u8>) -> Self { - Self::new_range(*value.start(), *value.end()) - } -} - -impl ByteRange { - pub fn new_range(from: u8, to: u8) -> Self { - assert!(from <= to, "{from} <= {to}"); - Self { from, to } - } - - pub fn new_single(c: u8) -> Self { - Self::new_range(c, c) - } - - pub fn contains(&self, c: u8) -> bool { - self.from <= c && c <= self.to - } - - pub fn overlaps(&self, other: Self) -> bool { - self.from.max(other.from) <= self.to.min(other.to) - } - - pub fn split_to_disjoint(ranges: Vec<ByteRange>) -> Vec<ByteRange> { - if ranges.is_empty() { - return vec![]; - } - - let mut points: Vec<u8> = Vec::new(); - for r in &ranges { - points.push(r.from); - if r.to != u8::MAX { - points.push(r.to + 1); - } - } - - points.sort_unstable(); - points.dedup(); - - let mut out = Vec::new(); - - for window in points.windows(2) { - let start = window[0]; - let end_exclusive = window[1]; - - if start >= end_exclusive { - continue; - } - - let mut active = false; - - for r in &ranges { - if r.from <= start && start <= r.to { - active = true; - break; - } - } - - if active { - out.push(ByteRange { - from: start, - to: end_exclusive - 1, - }); - } - } - - out - } -} - -#[test] -fn byterange_test() { - assert_eq!( - ByteRange::split_to_disjoint(vec![ - ByteRange::new_range(b'a', b'z'), - ByteRange::new_single(b'm') - ]), - vec![ - ByteRange::new_range(b'a', b'l'), - ByteRange::new_single(b'm'), - ByteRange::new_range(b'n', b'z'), - ] - ); -} - -impl std::fmt::Debug for ByteRange { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.from == self.to { - write!(f, "{}", [self.from].escape_ascii()) - } else { - write!( - f, - "{}-{}", - [self.from].escape_ascii(), - [self.to].escape_ascii() - ) - } - } -} - -#[cfg(test)] -mod non_overlapping_tests { - use std::ops::RangeInclusive; - - use super::ByteRange; - - fn middle(r: ByteRange) -> u8 { - let a = r.from as u8; - let b = r.to as u8; - (a + (b - a) / 2) as u8 - } - - fn prev(c: u8) -> u8 { - c - 1 - } - - fn next(c: u8) -> u8 { - c + 1 - } - - fn run(ranges: Vec<RangeInclusive<u8>>) { - let ranges1: Vec<ByteRange> = ranges.into_iter().map(Into::into).collect(); - let ranges2 = ByteRange::split_to_disjoint(ranges1.clone()); - - let r1 = |c| ranges1.iter().any(|cr| cr.contains(c)); - let r2 = |c| ranges2.iter().any(|cr| cr.contains(c)); - - for &range in ranges1.iter() { - assert!(r1(range.from)); - assert!(r1(range.to)); - assert!(r1(middle(range))); - - assert!(r2(range.from)); - assert!(r2(range.to)); - assert!(r2(middle(range))); - - assert_eq!(r1(prev(range.from)), r2(prev(range.from))); - assert_eq!(r1(next(range.from)), r2(next(range.from))); - } - - for i in 0..ranges2.len() { - for j in 0..i { - assert!( - !ranges2[i].overlaps(ranges2[j]), - "{i} and {j} overlap: {:?}, {:?}", - ranges2[i], - ranges2[j] - ); - } - } - } - - #[test] - fn overlap_correct() { - assert!(ByteRange::new_range(b'a', b'g').overlaps(ByteRange::new_single(b'f'))); - assert!(!ByteRange::new_range(b'a', b'g').overlaps(ByteRange::new_single(b'h'))); - } - - #[test] - fn empty() { - run(vec![]); - } - - #[test] - fn singleton() { - run(vec![b'0'..=b'9']); - } - - #[test] - fn contained1() { - run(vec![b'0'..=b'9', b'5'..=b'6']); - } - - #[test] - fn contained2() { - run(vec![b'5'..=b'6', b'0'..=b'9']); - } - - #[test] - fn overlap2() { - run(vec![b'1'..=b'6', b'4'..=b'9']) - } - - #[test] - fn overlap3() { - run(vec![b'a'..=b'f', b'd'..=b'j', b'g'..=b'm']) - } - - #[test] - fn overlap4() { - run(vec![b'a'..=b'f', b'd'..=b'j', b'g'..=b'm', b'k'..=b'q']) - } -} |
