aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/parse/regex/bc.rs354
-rw-r--r--src/parse/regex/byte_range.rs1
-rw-r--r--src/parse/regex/enfa.rs8
-rw-r--r--src/parse/regex/mod.rs52
-rw-r--r--tests/common.rs29
5 files changed, 393 insertions, 51 deletions
diff --git a/src/parse/regex/bc.rs b/src/parse/regex/bc.rs
index eec5628..75a6d5d 100644
--- a/src/parse/regex/bc.rs
+++ b/src/parse/regex/bc.rs
@@ -1,14 +1,18 @@
-use std::collections::VecDeque;
+use std::collections::{HashMap, VecDeque};
use crate::{
bitset::BitSet,
- parse::regex::{LookDirection, LookPolarity, byte_range::ByteRange},
+ parse::regex::{
+ CharacterClass, GreedyBehavior, LookDirection, LookPolarity, Pattern, byte_range::ByteRange,
+ },
};
trait Flavor: Clone {
type CustomInstr: Copy + Clone;
type ThreadData: Clone;
- type StepData<'a, 'b> where 'b : 'a;
+ type StepData<'a, 'b>
+ where
+ 'b: 'a;
fn accepts<'a, 'b>(
thread: &mut Thread<Self>,
@@ -22,7 +26,10 @@ struct MainFlavor;
impl Flavor for MainFlavor {
type CustomInstr = MainInstr;
type ThreadData = Box<[usize]>;
- type StepData<'a, 'b> = (usize, &'a BitSet, &'a mut LookaheadVM<'b>) where 'b : 'a;
+ type StepData<'a, 'b>
+ = (usize, &'a BitSet, &'a mut LookaheadVM<'b>)
+ where
+ 'b: 'a;
fn accepts<'a, 'b>(
thread: &mut Thread<Self>,
@@ -36,7 +43,7 @@ impl Flavor for MainFlavor {
}
MainInstr::Join(assertion) => {
let should_match = assertion.pol == LookPolarity::Positive;
- let state = assertion.what as usize;
+ let state = assertion.target as usize;
let is_matching = match assertion.dir {
LookDirection::Ahead => data.2.get_state(data.0, state),
LookDirection::Behind => data.1.get(state),
@@ -55,7 +62,10 @@ struct AssertionFlavor;
impl Flavor for AssertionFlavor {
type CustomInstr = Nothing;
type ThreadData = ();
- type StepData<'a, 'b> = () where 'b : 'a;
+ type StepData<'a, 'b>
+ = ()
+ where
+ 'b: 'a;
fn accepts(_thread: &mut Thread<Self>, instr: Self::CustomInstr, _sd: &mut ()) -> bool {
match instr {}
@@ -64,17 +74,17 @@ impl Flavor for AssertionFlavor {
type JumpTarget = u32;
type Register = u32;
-type AssertionRef = u32;
#[derive(Copy, Clone)]
struct Assertion {
- what: JumpTarget,
+ target: JumpTarget,
dir: LookDirection,
pol: LookPolarity,
}
#[derive(Copy, Clone)]
enum Instr<F: Flavor> {
+ Class(CharacterClass),
Consume(ByteRange),
Jump(JumpTarget),
Fork(JumpTarget, JumpTarget),
@@ -87,19 +97,12 @@ enum MainInstr {
Join(Assertion),
}
-#[derive(Copy, Clone)]
-enum Look {}
-
-type Registers = Box<[usize]>;
-
#[derive(Clone)]
struct Thread<F: Flavor> {
pc: JumpTarget,
data: F::ThreadData,
}
-struct CachedLookahead {}
-
struct VM<'p, F: Flavor> {
instr: &'p [Instr<F>],
threads: Vec<Thread<F>>,
@@ -107,27 +110,24 @@ struct VM<'p, F: Flavor> {
}
impl<'p, F: Flavor> VM<'p, F> {
- fn new(instr: &'p [Instr<F>]) -> Self {
+ fn new(instr: &'p [Instr<F>], starting_thread: Thread<F>) -> Self {
Self {
instr,
- threads: todo!("determine starting thread"),
+ threads: vec![starting_thread],
hot: BitSet::new(instr.len()),
}
}
- fn step<'a>(&mut self, byte: u8, sd: &mut F::StepData<'a, 'p>) {
+ fn step_epsilon<'a>(&mut self, sd: &mut F::StepData<'a, 'p>) {
let mut threads: VecDeque<_> = self.threads.drain(..).collect();
self.hot.set_all(false);
while let Some(mut thread) = threads.pop_front() {
match self.instr[thread.pc as usize] {
- Instr::Consume(bytes) => {
- if bytes.contains(byte) {
- thread.pc += 1;
- if !self.hot.get(thread.pc as usize) {
- self.hot.set(thread.pc as usize, true);
- self.threads.push(thread);
- }
+ Instr::Class(_) | Instr::Consume(_) => {
+ if !self.hot.get(thread.pc as usize) {
+ self.hot.set(thread.pc as usize, true);
+ self.threads.push(thread);
}
}
Instr::Jump(j) => {
@@ -153,6 +153,32 @@ impl<'p, F: Flavor> VM<'p, F> {
}
}
}
+
+ fn step_consume(&mut self, byte: u8) {
+ self.hot.set_all(false);
+ self.threads
+ .retain_mut(|thread| match self.instr[thread.pc as usize] {
+ Instr::Class(class) => {
+ if class.matches(byte) {
+ thread.pc += 1;
+ self.hot.set(thread.pc as usize, true);
+ true
+ } else {
+ false
+ }
+ }
+ Instr::Consume(bytes) => {
+ if bytes.contains(byte) {
+ thread.pc += 1;
+ self.hot.set(thread.pc as usize, true);
+ true
+ } else {
+ false
+ }
+ }
+ _ => false,
+ });
+ }
}
struct LookaheadVM<'a> {
@@ -180,7 +206,8 @@ impl<'a> LookaheadVM<'a> {
assert_eq!(self.loc_offset, 0);
self.loc_offset = loc;
for i in (loc..self.data.len()).rev() {
- self.vm.step(self.data[i], &mut ());
+ self.vm.step_epsilon(&mut ());
+ self.vm.step_consume(self.data[i]);
self.cache_data.push(self.vm.hot.clone());
}
self.cache_data.reverse();
@@ -204,9 +231,11 @@ struct VirtualMachine<'a> {
impl<'a> VirtualMachine<'a> {
fn step(&mut self, byte: u8, loc: usize) {
- self.vm0.step(byte, &mut ());
+ self.vm0.step_epsilon(&mut ());
self.vm1
- .step(byte, &mut (loc, &self.vm0.hot, &mut self.vm2));
+ .step_epsilon(&mut (loc, &self.vm0.hot, &mut self.vm2));
+ self.vm0.step_consume(byte);
+ self.vm1.step_consume(byte);
}
fn extract_match(&self) -> Option<Match> {
@@ -220,19 +249,24 @@ impl<'a> VirtualMachine<'a> {
.next()
}
}
-
-pub struct ByteCodeCompiledRegex {
+pub struct BytecodeCompiledRegex {
instrs0: Box<[Instr<AssertionFlavor>]>,
instrs1: Box<[Instr<MainFlavor>]>,
instrs2: Box<[Instr<AssertionFlavor>]>,
accepting: BitSet,
}
-impl ByteCodeCompiledRegex {
+impl BytecodeCompiledRegex {
pub fn re_match(&self, data: &[u8]) -> Option<Match> {
- let vm0 = VM::new(&self.instrs0);
- let vm1 = VM::new(&self.instrs1);
- let vm2 = VM::new(&self.instrs2);
+ let vm0 = VM::new(&self.instrs0, Thread { pc: 0, data: () });
+ let vm1 = VM::new(
+ &self.instrs1,
+ Thread {
+ pc: 0,
+ data: Vec::new().into(), // TODO: submatches
+ },
+ );
+ let vm2 = VM::new(&self.instrs2, Thread { pc: 0, data: () });
let vm2 = LookaheadVM::new(vm2, data);
let mut vm = VirtualMachine {
vm0,
@@ -245,8 +279,260 @@ impl ByteCodeCompiledRegex {
}
vm.extract_match()
}
+
+ pub fn matches(&self, data: &[u8]) -> bool {
+ self.re_match(data).is_some()
+ }
}
pub struct Match {
pub registers: Box<[usize]>,
}
+
+type AssertionHandler<'a, F> =
+ Box<dyn 'a + FnMut(LookDirection, LookPolarity, Pattern) -> CompileResult<Instr<F>>>;
+
+#[derive(Copy, Clone)]
+struct CompiledSnippet {
+ begin: JumpTarget,
+ end: JumpTarget,
+}
+
+struct Compiler<'a, F: Flavor> {
+ instrs: Vec<Instr<F>>,
+ map: HashMap<Pattern, CompiledSnippet>,
+ assertion_handler: AssertionHandler<'a, F>,
+ assertion_fork_base: usize,
+}
+
+fn fork<F: Flavor>(repeat: usize, exit: usize, greedy: GreedyBehavior) -> Instr<F> {
+ let repeat = repeat as JumpTarget;
+ let exit = exit as JumpTarget;
+ match greedy {
+ GreedyBehavior::Greedy => Instr::Fork(repeat, exit),
+ GreedyBehavior::NonGreedy => Instr::Fork(exit, repeat),
+ }
+}
+
+impl<'a, F: Flavor> Compiler<'a, F> {
+ fn new(
+ assertion_handler: impl 'a
+ + FnMut(LookDirection, LookPolarity, Pattern) -> CompileResult<Instr<F>>,
+ ) -> Self {
+ Self {
+ instrs: Vec::new(),
+ map: HashMap::new(),
+ assertion_handler: Box::new(assertion_handler),
+ assertion_fork_base: usize::MAX,
+ }
+ }
+
+ fn rep_1_or_more(&mut self, pat: Pattern, greedy: GreedyBehavior) -> CompileResult {
+ let base = self.instrs.len();
+ self.compile(pat)?;
+ let exit = self.instrs.len() + 1;
+ self.instrs.push(fork(base, exit, greedy));
+ Ok(())
+ }
+
+ fn rep_0_or_1(&mut self, pat: Pattern, greedy: GreedyBehavior) -> CompileResult {
+ let base = self.instrs.len();
+ self.instrs.push(Instr::Jump(u32::MAX));
+ self.compile(pat)?;
+ self.instrs[base] = fork(base + 1, self.instrs.len(), greedy);
+ Ok(())
+ }
+
+ fn rep_any_amt(&mut self, pat: Pattern, greedy: GreedyBehavior) -> CompileResult {
+ let base = self.instrs.len();
+ self.instrs.push(Instr::Jump(u32::MAX));
+ self.compile(pat)?;
+ let fork_pos = self.instrs.len();
+ let after = fork_pos + 1;
+ self.instrs.push(fork(base, after, greedy));
+ self.instrs[base] = Instr::Jump(fork_pos as JumpTarget);
+ Ok(())
+ }
+
+ fn compile(&mut self, pat: Pattern) -> CompileResult {
+ match pat {
+ Pattern::Byte(x) => self.instrs.push(Instr::Consume(ByteRange::new_single(x))),
+ Pattern::Range(a, b) => self.instrs.push(Instr::Consume(ByteRange::new_range(a, b))),
+ Pattern::CharacterClass(cc) => {
+ self.instrs.push(Instr::Class(cc));
+ }
+ Pattern::Alt(patterns) => {
+ let branch_factor = patterns.len();
+ assert!(branch_factor > 0);
+
+ let base = self.instrs.len();
+
+ // placeholders to later place in forks
+ for _ in 0..patterns.len() - 1 {
+ self.instrs.push(Instr::Jump(u32::MAX));
+ }
+
+ let mut enter_pats = Vec::new();
+ let mut leave_pats = Vec::new();
+ for pat in patterns.into_iter() {
+ enter_pats.push(self.instrs.len());
+ self.compile(pat)?;
+ leave_pats.push(self.instrs.len());
+
+ // placeholder to place in join
+ self.instrs.push(Instr::Jump(u32::MAX));
+ }
+
+ self.instrs.pop(); // remove last jump
+ let join_point = self.instrs.len();
+
+ // link forks
+ for i in 0..branch_factor - 1 {
+ let a = enter_pats[i];
+ let b = if i == branch_factor - 2 {
+ enter_pats[i + 1]
+ } else {
+ base + i + 1
+ };
+ self.instrs[base + i] = Instr::Fork(a as JumpTarget, b as JumpTarget);
+ }
+
+ // link joins
+ for i in 0..branch_factor - 1 {
+ self.instrs[leave_pats[i]] = Instr::Jump(join_point as JumpTarget);
+ }
+ }
+ Pattern::Concat(patterns) => {
+ for pat in patterns.into_iter() {
+ self.compile(pat)?;
+ }
+ }
+ Pattern::Rep(pat, 0, None, greed) => {
+ self.rep_any_amt(*pat, greed)?;
+ }
+ Pattern::Rep(pat, min, None, greed) => {
+ let pat = *pat;
+ for _ in 1..min {
+ self.compile(pat.clone())?;
+ }
+ self.rep_1_or_more(pat, greed)?;
+ }
+ Pattern::Rep(pat, min, Some(max), greed) => {
+ let pat = *pat;
+ let opt = max - min;
+ for _ in 0..min {
+ self.compile(pat.clone())?;
+ }
+ for _ in 0..opt {
+ self.rep_0_or_1(pat.clone(), greed)?;
+ }
+ }
+ Pattern::Assertion(look_direction, look_polarity, pattern) => {
+ let ins = (self.assertion_handler)(look_direction, look_polarity, *pattern)?;
+ self.instrs.push(ins);
+ }
+ Pattern::Nothing => {}
+ }
+ Ok(())
+ }
+
+ fn compile_and_memoize(&mut self, pat: Pattern) -> CompileResult<CompiledSnippet> {
+ if let Some(&jt) = self.map.get(&pat) {
+ return Ok(jt);
+ }
+ let begin = self.instrs.len() as JumpTarget;
+ self.compile(pat.clone())?;
+ let end = self.instrs.len() as JumpTarget;
+ self.instrs.push(Instr::Class(CharacterClass::Nothing));
+ let bounds = CompiledSnippet { begin, end };
+ self.map.insert(pat, bounds);
+ Ok(bounds)
+ }
+
+ fn finalize_assertion_forks(&mut self) {
+ let fork_targets: Vec<JumpTarget> = self.map.values().map(|v| v.begin).collect();
+ let fork_begin = self.instrs.len() as JumpTarget;
+ match fork_targets.len() {
+ 0 => {
+ self.instrs[self.assertion_fork_base] = Instr::Class(CharacterClass::Nothing);
+ }
+ 1 => {
+ self.instrs[self.assertion_fork_base] = Instr::Jump(fork_targets[0]);
+ }
+ 2 => {
+ self.instrs[self.assertion_fork_base] =
+ Instr::Fork(fork_targets[0], fork_targets[1]);
+ }
+ n => {
+ self.instrs[self.assertion_fork_base] = Instr::Fork(fork_targets[0], fork_begin);
+ for i in 1..n - 1 {
+ let fork = if i == n - 2 {
+ Instr::Fork(fork_targets[i], fork_targets[i + 1])
+ } else {
+ Instr::Fork(fork_targets[i], self.instrs.len() as JumpTarget + 1)
+ };
+ self.instrs.push(fork);
+ }
+ }
+ }
+ }
+}
+
+fn assertion_compiler() -> Compiler<'static, AssertionFlavor> {
+ let mut c = Compiler::new(|_, _, _| Err(RegexCompilationError::NestedLookaroundNotSupported));
+ c.rep_any_amt(
+ Pattern::CharacterClass(CharacterClass::Everything),
+ GreedyBehavior::NonGreedy,
+ )
+ .expect("characterclass should always compile");
+ c.assertion_fork_base = c.instrs.len();
+ c.instrs.push(Instr::Jump(u32::MAX)); // in the end this gets replaced by a jump to a fork-list for all the assertions
+ c
+}
+
+#[derive(Clone, Debug)]
+pub enum RegexCompilationError {
+ NestedLookaroundNotSupported,
+}
+
+pub type CompileResult<T = ()> = Result<T, RegexCompilationError>;
+
+impl TryFrom<Pattern> for BytecodeCompiledRegex {
+ type Error = RegexCompilationError;
+
+ fn try_from(value: Pattern) -> Result<Self, Self::Error> {
+ let mut neg = assertion_compiler();
+ let mut pos = assertion_compiler();
+ let (final_state, instrs) = {
+ let mut main: Compiler<MainFlavor> = Compiler::new(|dir, pol, pat| {
+ let target = match dir {
+ LookDirection::Ahead => pos.compile_and_memoize(pat.reverse()),
+ LookDirection::Behind => neg.compile_and_memoize(pat),
+ }?
+ .end;
+
+ Ok(Instr::Custom(MainInstr::Join(Assertion {
+ target,
+ dir,
+ pol,
+ })))
+ });
+ main.compile(value)?;
+ let end = main.instrs.len();
+ main.instrs.push(Instr::Class(CharacterClass::Nothing));
+ (end, main.instrs)
+ };
+ neg.finalize_assertion_forks();
+ pos.finalize_assertion_forks();
+
+ let mut accepting = BitSet::new(instrs.len());
+ accepting.set(final_state, true);
+
+ Ok(Self {
+ instrs0: neg.instrs.into(),
+ instrs1: instrs.into(),
+ instrs2: pos.instrs.into(),
+ accepting,
+ })
+ }
+}
diff --git a/src/parse/regex/byte_range.rs b/src/parse/regex/byte_range.rs
index 0151f0c..b7642c1 100644
--- a/src/parse/regex/byte_range.rs
+++ b/src/parse/regex/byte_range.rs
@@ -20,7 +20,6 @@ impl ByteRange {
Self { from, to }
}
- #[cfg(test)]
pub fn new_single(c: u8) -> Self {
Self::new_range(c, c)
}
diff --git a/src/parse/regex/enfa.rs b/src/parse/regex/enfa.rs
index 272c709..dd3839f 100644
--- a/src/parse/regex/enfa.rs
+++ b/src/parse/regex/enfa.rs
@@ -637,6 +637,7 @@ impl EState {
#[derive(Debug)]
pub enum EnfaTranslationError {
+ CharacterClassNotSupported,
AssertionsNotSupported,
}
@@ -656,6 +657,9 @@ impl TryFrom<Pattern> for ENFA {
EState::terminal(),
],
},
+ Pattern::CharacterClass(_) => {
+ return Err(EnfaTranslationError::CharacterClassNotSupported);
+ }
Pattern::Alt(alts) => {
let nfas: Vec<ENFA> = alts
.into_iter()
@@ -683,13 +687,13 @@ impl TryFrom<Pattern> for ENFA {
.collect::<Result<_, _>>()?;
Self::concat(nfas)
}
- Pattern::Rep(regex, min, None) => {
+ Pattern::Rep(regex, min, None, _) => {
let nfa = ENFA::try_from(*regex)?;
let base = nfa.clone().repeat(min as usize);
let tail = nfa.looping();
Self::concat(vec![base, tail])
}
- Pattern::Rep(regex, min, Some(max)) => {
+ Pattern::Rep(regex, min, Some(max), _) => {
assert!(min < max);
let nfa = Self::try_from(*regex)?;
let base = nfa.clone().repeat(min as usize);
diff --git a/src/parse/regex/mod.rs b/src/parse/regex/mod.rs
index 1bcf18c..79e234f 100644
--- a/src/parse/regex/mod.rs
+++ b/src/parse/regex/mod.rs
@@ -2,12 +2,12 @@ use crate::parse::{NotImplementedKind, OtherHighlights};
use super::{Parse, ParseError, Result};
+pub mod bc;
mod byte_range;
pub mod dfa;
pub mod enfa;
-pub mod bc;
-#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
pub enum LookDirection {
Ahead,
Behind,
@@ -28,13 +28,35 @@ pub enum LookPolarity {
Negative,
}
-#[derive(PartialEq, Debug, Clone)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
+pub enum CharacterClass {
+ Everything,
+ Nothing,
+ Whitespace,
+ Alphabetic,
+ Alphanumeric,
+}
+
+impl CharacterClass {
+ pub fn matches(self, byte: u8) -> bool {
+ match self {
+ CharacterClass::Everything => true,
+ CharacterClass::Nothing => false,
+ CharacterClass::Whitespace => byte.is_ascii_whitespace(),
+ CharacterClass::Alphabetic => byte.is_ascii_alphabetic(),
+ CharacterClass::Alphanumeric => byte.is_ascii_alphanumeric(),
+ }
+ }
+}
+
+#[derive(PartialEq, Eq, Hash, Debug, Clone)]
pub enum Pattern {
Byte(u8),
Range(u8, u8),
+ CharacterClass(CharacterClass),
Alt(Vec<Pattern>),
Concat(Vec<Pattern>),
- Rep(Box<Pattern>, u32, Option<u32>),
+ Rep(Box<Pattern>, u32, Option<u32>, GreedyBehavior),
Assertion(LookDirection, LookPolarity, Box<Pattern>),
Nothing,
}
@@ -106,16 +128,17 @@ impl Pattern {
match self {
Pattern::Byte(_) => ByteConsumption::one(),
Pattern::Range(_, _) => ByteConsumption::one(),
+ Pattern::CharacterClass(_) => ByteConsumption::one(),
Pattern::Alt(patterns) => patterns
.iter()
.map(Self::max_byte_consumption)
.max()
.unwrap_or(ByteConsumption::zero()),
Pattern::Concat(patterns) => patterns.iter().map(Self::max_byte_consumption).sum(),
- Pattern::Rep(pattern, _, Some(max_reps)) => {
+ Pattern::Rep(pattern, _, Some(max_reps), _) => {
pattern.max_byte_consumption() * (*max_reps as usize)
}
- Pattern::Rep(_, _, None) => ByteConsumption::Unbounded,
+ Pattern::Rep(_, _, None, _) => ByteConsumption::Unbounded,
Pattern::Assertion(_, _, _) => ByteConsumption::zero(),
Pattern::Nothing => ByteConsumption::zero(),
}
@@ -124,10 +147,10 @@ impl Pattern {
pub fn reverse(self) -> Self {
use Pattern::*;
match self {
- Byte(_) | Nothing | Range(..) => self,
+ Byte(_) | Nothing | Range(..) | CharacterClass(_) => self,
Alt(patterns) => Alt(patterns.into_iter().map(Self::reverse).collect()),
Concat(patterns) => Concat(patterns.into_iter().map(Self::reverse).rev().collect()),
- Rep(pattern, min, max) => Rep(Box::new(pattern.reverse()), min, max),
+ Rep(pattern, min, max, greedy) => Rep(Box::new(pattern.reverse()), min, max, greedy),
Assertion(dir, pol, pat) => Assertion(dir.reverse(), pol, Box::new(pat.reverse())),
}
}
@@ -217,12 +240,23 @@ fn parse_rep(s: &mut super::Cursor<'_>) -> Result<Pattern> {
));
}
- Ok(Pattern::Rep(Box::new(atom), min_rep, max_rep))
+ Ok(Pattern::Rep(
+ Box::new(atom),
+ min_rep,
+ max_rep,
+ GreedyBehavior::Greedy,
+ ))
} else {
Ok(atom)
}
}
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum GreedyBehavior {
+ Greedy,
+ NonGreedy,
+}
+
const SYMBOLS: &[u8] = b"{}[]()*+-?|.\\ ";
fn is_symbol(x: u8) -> bool {
SYMBOLS.contains(&x)
diff --git a/tests/common.rs b/tests/common.rs
index acf562e..ba4b292 100644
--- a/tests/common.rs
+++ b/tests/common.rs
@@ -2,7 +2,7 @@
use pish::parse::{
Parse,
- regex::{CompiledPattern, Pattern},
+ regex::{CompiledPattern, Pattern, bc::BytecodeCompiledRegex},
};
pub fn test_case(_name: &str, script: &[u8], expected_output: &[u8]) {
@@ -30,9 +30,28 @@ pub fn test_case(_name: &str, script: &[u8], expected_output: &[u8]) {
);
}
-pub fn regex(pat: &str) -> CompiledPattern {
- Pattern::parse_from_bytes(pat.as_bytes())
- .expect(&format!("pattern {pat} does not parse"))
+pub struct MultiTestCompiledPattern {
+ dfa: CompiledPattern,
+ vm: BytecodeCompiledRegex,
+}
+
+impl MultiTestCompiledPattern {
+ pub fn matches(&self, string: impl Clone + AsRef<[u8]>) -> bool {
+ let dfa_result = self.dfa.matches(string.clone());
+ let vm_result = self.vm.matches(string.as_ref());
+ assert_eq!(dfa_result, vm_result);
+ dfa_result
+ }
+}
+
+pub fn regex(pat: &str) -> MultiTestCompiledPattern {
+ let parsed =
+ Pattern::parse_from_bytes(pat.as_bytes()).expect(&format!("pattern {pat} does not parse"));
+ let dfa = parsed
+ .clone()
.try_compile()
- .expect(&format!("pattern {pat} does not compile"))
+ .expect(&format!("pattern {pat} does not compile to DFA"));
+ let vm = BytecodeCompiledRegex::try_from(parsed.clone())
+ .expect(&format!("pattern {pat} does not compile to VM"));
+ MultiTestCompiledPattern { dfa, vm }
}