aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex/decision_tree.rs
blob: fed6e700166d3ee63b16d3de18aea232027b4e81 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use std::collections::HashMap;

use crate::regex::byte_range::ByteRange;

pub struct DecisionTree<T> {
    instrs: Vec<Instr>,
    ret_vals: Vec<T>,
}

#[repr(u32)]
#[derive(Copy, Clone)]
enum Instr {
    JumpIfGe(u8, u8),
    Ret(u8),
}

impl<T: Clone> DecisionTree<T> {
    pub fn decide(&self, val: u8) -> T {
        let mut pc = 0;
        loop {
            match self.instrs[pc] {
                Instr::JumpIfGe(pivot, rela) => {
                    if val >= pivot {
                        pc += rela as usize;
                    } else {
                        pc += 1;
                    }
                }
                Instr::Ret(idx) => return self.ret_vals[idx as usize].clone(),
            }
        }
    }
}

impl<T: PartialEq + Clone> DecisionTree<T> {
    fn add_retval(&mut self, searched_val: &T) -> u8 {
        for (i, val) in self.ret_vals.iter().enumerate() {
            if val == searched_val {
                return i as u8;
            }
        }
        self.ret_vals.push(searched_val.clone());
        (self.ret_vals.len() - 1) as u8
    }

    fn merge(mut self, other: Self) -> Self {
        for ins in other.instrs.into_iter() {
            match ins {
                Instr::JumpIfGe(_, _) => self.instrs.push(ins),
                Instr::Ret(i) => {
                    let i = self.add_retval(&other.ret_vals[i as usize]);
                    self.instrs.push(Instr::Ret(i));
                }
            }
        }
        self
    }

    fn build_inner(ranges: &[(ByteRange, T)]) -> Self {
        assert!(!ranges.is_empty());
        if ranges.len() == 1 {
            Self {
                instrs: vec![Instr::Ret(0)],
                ret_vals: vec![ranges[0].1.clone()],
            }
        } else {
            let pivot = ranges.len() / 2;
            let a = Self::build_inner(&ranges[..pivot]);
            let b = Self::build_inner(&ranges[pivot..]);
            let decider = Self {
                instrs: vec![Instr::JumpIfGe(
                    ranges[pivot].0.lower_bound(),
                    a.instrs.len() as u8 + 1,
                )],
                ret_vals: Vec::new(),
            };
            decider.merge(a).merge(b)
        }
    }

    pub fn build(map: HashMap<ByteRange, T>, default: T) -> Self {
        let mut ranges: Vec<ByteRange> = map.keys().cloned().collect();
        ranges.push(ByteRange::all());
        let ranges = ByteRange::split_to_disjoint(ranges);
        for i in 0..ranges.len() - 1 {
            assert_eq!(ranges[i].upper_bound() + 1, ranges[i + 1].lower_bound());
        }

        let ranges: Vec<(ByteRange, T)> = ranges
            .into_iter()
            .map(|r| {
                let maps_to = map
                    .iter()
                    .filter_map(|(x, t)| if x.overlaps(r) { Some(t.clone()) } else { None })
                    .next()
                    .unwrap_or(default.clone());
                (r, maps_to)
            })
            .collect();

        Self::build_inner(&ranges)
    }
}

impl<T> Default for DecisionTree<T> {
    fn default() -> Self {
        Self {
            instrs: Default::default(),
            ret_vals: Default::default(),
        }
    }
}