aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse/regex/byte_range.rs
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-06-06 12:15:52 +0200
committerJonas Maier <jonas@x77.dev>2026-06-06 12:15:52 +0200
commit53980774c327675e886179c0a2c140744dcf9b95 (patch)
treeca1fdcc9938fce2c10c51e0a51659c6ba38ac5ba /src/parse/regex/byte_range.rs
parent75e0c29cf91ddc6299c14a94a038c3e3df3d2805 (diff)
downloadpish-53980774c327675e886179c0a2c140744dcf9b95.tar.gz
special cased regex for performance
Diffstat (limited to 'src/parse/regex/byte_range.rs')
-rw-r--r--src/parse/regex/byte_range.rs204
1 files changed, 0 insertions, 204 deletions
diff --git a/src/parse/regex/byte_range.rs b/src/parse/regex/byte_range.rs
deleted file mode 100644
index b7642c1..0000000
--- a/src/parse/regex/byte_range.rs
+++ /dev/null
@@ -1,204 +0,0 @@
-use std::ops::RangeInclusive;
-
-#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
-pub struct ByteRange {
- /// inclusive
- from: u8,
- /// inclusive
- to: u8,
-}
-
-impl From<RangeInclusive<u8>> for ByteRange {
- fn from(value: RangeInclusive<u8>) -> Self {
- Self::new_range(*value.start(), *value.end())
- }
-}
-
-impl ByteRange {
- pub fn new_range(from: u8, to: u8) -> Self {
- assert!(from <= to, "{from} <= {to}");
- Self { from, to }
- }
-
- pub fn new_single(c: u8) -> Self {
- Self::new_range(c, c)
- }
-
- pub fn contains(&self, c: u8) -> bool {
- self.from <= c && c <= self.to
- }
-
- pub fn overlaps(&self, other: Self) -> bool {
- self.from.max(other.from) <= self.to.min(other.to)
- }
-
- pub fn split_to_disjoint(ranges: Vec<ByteRange>) -> Vec<ByteRange> {
- if ranges.is_empty() {
- return vec![];
- }
-
- let mut points: Vec<u8> = Vec::new();
- for r in &ranges {
- points.push(r.from);
- if r.to != u8::MAX {
- points.push(r.to + 1);
- }
- }
-
- points.sort_unstable();
- points.dedup();
-
- let mut out = Vec::new();
-
- for window in points.windows(2) {
- let start = window[0];
- let end_exclusive = window[1];
-
- if start >= end_exclusive {
- continue;
- }
-
- let mut active = false;
-
- for r in &ranges {
- if r.from <= start && start <= r.to {
- active = true;
- break;
- }
- }
-
- if active {
- out.push(ByteRange {
- from: start,
- to: end_exclusive - 1,
- });
- }
- }
-
- out
- }
-}
-
-#[test]
-fn byterange_test() {
- assert_eq!(
- ByteRange::split_to_disjoint(vec![
- ByteRange::new_range(b'a', b'z'),
- ByteRange::new_single(b'm')
- ]),
- vec![
- ByteRange::new_range(b'a', b'l'),
- ByteRange::new_single(b'm'),
- ByteRange::new_range(b'n', b'z'),
- ]
- );
-}
-
-impl std::fmt::Debug for ByteRange {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- if self.from == self.to {
- write!(f, "{}", [self.from].escape_ascii())
- } else {
- write!(
- f,
- "{}-{}",
- [self.from].escape_ascii(),
- [self.to].escape_ascii()
- )
- }
- }
-}
-
-#[cfg(test)]
-mod non_overlapping_tests {
- use std::ops::RangeInclusive;
-
- use super::ByteRange;
-
- fn middle(r: ByteRange) -> u8 {
- let a = r.from as u8;
- let b = r.to as u8;
- (a + (b - a) / 2) as u8
- }
-
- fn prev(c: u8) -> u8 {
- c - 1
- }
-
- fn next(c: u8) -> u8 {
- c + 1
- }
-
- fn run(ranges: Vec<RangeInclusive<u8>>) {
- let ranges1: Vec<ByteRange> = ranges.into_iter().map(Into::into).collect();
- let ranges2 = ByteRange::split_to_disjoint(ranges1.clone());
-
- let r1 = |c| ranges1.iter().any(|cr| cr.contains(c));
- let r2 = |c| ranges2.iter().any(|cr| cr.contains(c));
-
- for &range in ranges1.iter() {
- assert!(r1(range.from));
- assert!(r1(range.to));
- assert!(r1(middle(range)));
-
- assert!(r2(range.from));
- assert!(r2(range.to));
- assert!(r2(middle(range)));
-
- assert_eq!(r1(prev(range.from)), r2(prev(range.from)));
- assert_eq!(r1(next(range.from)), r2(next(range.from)));
- }
-
- for i in 0..ranges2.len() {
- for j in 0..i {
- assert!(
- !ranges2[i].overlaps(ranges2[j]),
- "{i} and {j} overlap: {:?}, {:?}",
- ranges2[i],
- ranges2[j]
- );
- }
- }
- }
-
- #[test]
- fn overlap_correct() {
- assert!(ByteRange::new_range(b'a', b'g').overlaps(ByteRange::new_single(b'f')));
- assert!(!ByteRange::new_range(b'a', b'g').overlaps(ByteRange::new_single(b'h')));
- }
-
- #[test]
- fn empty() {
- run(vec![]);
- }
-
- #[test]
- fn singleton() {
- run(vec![b'0'..=b'9']);
- }
-
- #[test]
- fn contained1() {
- run(vec![b'0'..=b'9', b'5'..=b'6']);
- }
-
- #[test]
- fn contained2() {
- run(vec![b'5'..=b'6', b'0'..=b'9']);
- }
-
- #[test]
- fn overlap2() {
- run(vec![b'1'..=b'6', b'4'..=b'9'])
- }
-
- #[test]
- fn overlap3() {
- run(vec![b'a'..=b'f', b'd'..=b'j', b'g'..=b'm'])
- }
-
- #[test]
- fn overlap4() {
- run(vec![b'a'..=b'f', b'd'..=b'j', b'g'..=b'm', b'k'..=b'q'])
- }
-}