aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex/decision_tree.rs
diff options
context:
space:
mode:
authorJonas Maier <jonas@x77.dev>2026-06-06 14:29:21 +0200
committerJonas Maier <jonas@x77.dev>2026-06-06 14:29:21 +0200
commit24d41bb4daf081bb9cd63a2107b28b1878594ed3 (patch)
treee571d21edd4e20a3f7246bebf805b461c5ed7a43 /src/regex/decision_tree.rs
parentd39ed8fc77981f937c35fa84a7ff5d288d0c7181 (diff)
downloadpish-24d41bb4daf081bb9cd63a2107b28b1878594ed3.tar.gz
tried to implement a decision tree for faster dfa, but it is not faster
Diffstat (limited to 'src/regex/decision_tree.rs')
-rw-r--r--src/regex/decision_tree.rs112
1 files changed, 112 insertions, 0 deletions
diff --git a/src/regex/decision_tree.rs b/src/regex/decision_tree.rs
new file mode 100644
index 0000000..fed6e70
--- /dev/null
+++ b/src/regex/decision_tree.rs
@@ -0,0 +1,112 @@
+use std::collections::HashMap;
+
+use crate::regex::byte_range::ByteRange;
+
+pub struct DecisionTree<T> {
+ instrs: Vec<Instr>,
+ ret_vals: Vec<T>,
+}
+
+#[repr(u32)]
+#[derive(Copy, Clone)]
+enum Instr {
+ JumpIfGe(u8, u8),
+ Ret(u8),
+}
+
+impl<T: Clone> DecisionTree<T> {
+ pub fn decide(&self, val: u8) -> T {
+ let mut pc = 0;
+ loop {
+ match self.instrs[pc] {
+ Instr::JumpIfGe(pivot, rela) => {
+ if val >= pivot {
+ pc += rela as usize;
+ } else {
+ pc += 1;
+ }
+ }
+ Instr::Ret(idx) => return self.ret_vals[idx as usize].clone(),
+ }
+ }
+ }
+}
+
+impl<T: PartialEq + Clone> DecisionTree<T> {
+ fn add_retval(&mut self, searched_val: &T) -> u8 {
+ for (i, val) in self.ret_vals.iter().enumerate() {
+ if val == searched_val {
+ return i as u8;
+ }
+ }
+ self.ret_vals.push(searched_val.clone());
+ (self.ret_vals.len() - 1) as u8
+ }
+
+ fn merge(mut self, other: Self) -> Self {
+ for ins in other.instrs.into_iter() {
+ match ins {
+ Instr::JumpIfGe(_, _) => self.instrs.push(ins),
+ Instr::Ret(i) => {
+ let i = self.add_retval(&other.ret_vals[i as usize]);
+ self.instrs.push(Instr::Ret(i));
+ }
+ }
+ }
+ self
+ }
+
+ fn build_inner(ranges: &[(ByteRange, T)]) -> Self {
+ assert!(!ranges.is_empty());
+ if ranges.len() == 1 {
+ Self {
+ instrs: vec![Instr::Ret(0)],
+ ret_vals: vec![ranges[0].1.clone()],
+ }
+ } else {
+ let pivot = ranges.len() / 2;
+ let a = Self::build_inner(&ranges[..pivot]);
+ let b = Self::build_inner(&ranges[pivot..]);
+ let decider = Self {
+ instrs: vec![Instr::JumpIfGe(
+ ranges[pivot].0.lower_bound(),
+ a.instrs.len() as u8 + 1,
+ )],
+ ret_vals: Vec::new(),
+ };
+ decider.merge(a).merge(b)
+ }
+ }
+
+ pub fn build(map: HashMap<ByteRange, T>, default: T) -> Self {
+ let mut ranges: Vec<ByteRange> = map.keys().cloned().collect();
+ ranges.push(ByteRange::all());
+ let ranges = ByteRange::split_to_disjoint(ranges);
+ for i in 0..ranges.len() - 1 {
+ assert_eq!(ranges[i].upper_bound() + 1, ranges[i + 1].lower_bound());
+ }
+
+ let ranges: Vec<(ByteRange, T)> = ranges
+ .into_iter()
+ .map(|r| {
+ let maps_to = map
+ .iter()
+ .filter_map(|(x, t)| if x.overlaps(r) { Some(t.clone()) } else { None })
+ .next()
+ .unwrap_or(default.clone());
+ (r, maps_to)
+ })
+ .collect();
+
+ Self::build_inner(&ranges)
+ }
+}
+
+impl<T> Default for DecisionTree<T> {
+ fn default() -> Self {
+ Self {
+ instrs: Default::default(),
+ ret_vals: Default::default(),
+ }
+ }
+}