aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse/regex/byte_range.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse/regex/byte_range.rs')
-rw-r--r--src/parse/regex/byte_range.rs84
1 files changed, 55 insertions, 29 deletions
diff --git a/src/parse/regex/byte_range.rs b/src/parse/regex/byte_range.rs
index 86b2c2b..0151f0c 100644
--- a/src/parse/regex/byte_range.rs
+++ b/src/parse/regex/byte_range.rs
@@ -16,7 +16,7 @@ impl From<RangeInclusive<u8>> for ByteRange {
impl ByteRange {
pub fn new_range(from: u8, to: u8) -> Self {
- assert!(from <= to);
+ assert!(from <= to, "{from} <= {to}");
Self { from, to }
}
@@ -33,42 +33,68 @@ impl ByteRange {
self.from.max(other.from) <= self.to.min(other.to)
}
- pub fn non_overlapping(sets: Vec<ByteRange>) -> Vec<ByteRange> {
- let begins = sets.iter().map(|cs| (cs.from, 1));
- let ends = sets.iter().map(|cs| (cs.to, 2));
- let mut edges: Vec<_> = begins.chain(ends).collect();
- edges.sort();
- edges.iter_mut().for_each(|c| {
- if c.1 == 2 {
- c.1 = -1;
+ pub fn split_to_disjoint(ranges: Vec<ByteRange>) -> Vec<ByteRange> {
+ if ranges.is_empty() {
+ return vec![];
+ }
+
+ let mut points: Vec<u8> = Vec::new();
+ for r in &ranges {
+ points.push(r.from);
+ if r.to != u8::MAX {
+ points.push(r.to + 1);
}
- });
-
- let mut last = None;
- let mut depth = 0;
+ }
+
+ points.sort_unstable();
+ points.dedup();
+
let mut out = Vec::new();
-
- for (mut loc, delta) in edges {
- if let Some(last) = last
- && last <= loc
- {
- out.push(ByteRange::new_range(last, loc));
- loc += 1;
+
+ for window in points.windows(2) {
+ let start = window[0];
+ let end_exclusive = window[1];
+
+ if start >= end_exclusive {
+ continue;
}
-
- depth += delta;
-
- if depth > 0 {
- last = Some(loc);
- } else {
- last = None;
+
+ let mut active = false;
+
+ for r in &ranges {
+ if r.from <= start && start <= r.to {
+ active = true;
+ break;
+ }
+ }
+
+ if active {
+ out.push(ByteRange {
+ from: start,
+ to: end_exclusive - 1,
+ });
}
}
-
+
out
}
}
+#[test]
+fn byterange_test() {
+ assert_eq!(
+ ByteRange::split_to_disjoint(vec![
+ ByteRange::new_range(b'a', b'z'),
+ ByteRange::new_single(b'm')
+ ]),
+ vec![
+ ByteRange::new_range(b'a', b'l'),
+ ByteRange::new_single(b'm'),
+ ByteRange::new_range(b'n', b'z'),
+ ]
+ );
+}
+
impl std::fmt::Debug for ByteRange {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.from == self.to {
@@ -106,7 +132,7 @@ mod non_overlapping_tests {
fn run(ranges: Vec<RangeInclusive<u8>>) {
let ranges1: Vec<ByteRange> = ranges.into_iter().map(Into::into).collect();
- let ranges2 = ByteRange::non_overlapping(ranges1.clone());
+ let ranges2 = ByteRange::split_to_disjoint(ranges1.clone());
let r1 = |c| ranges1.iter().any(|cr| cr.contains(c));
let r2 = |c| ranges2.iter().any(|cr| cr.contains(c));