aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex/bc.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/bc.rs')
-rw-r--r--src/regex/bc.rs94
1 files changed, 43 insertions, 51 deletions
diff --git a/src/regex/bc.rs b/src/regex/bc.rs
index ea1d086..b94433e 100644
--- a/src/regex/bc.rs
+++ b/src/regex/bc.rs
@@ -1,10 +1,10 @@
-use std::collections::{HashMap, VecDeque};
+use std::collections::HashMap;
use super::{
Class, GreedyBehavior, LookDirection, LookPolarity, Match, Pattern, RegexEngine,
byte_range::ByteRange,
};
-use crate::bitset::BitSet;
+use crate::{BString, bitset::BitSet};
trait Flavor: Clone {
type CustomInstr: Copy + Clone + std::fmt::Debug;
@@ -267,6 +267,7 @@ struct VirtualMachine<'a> {
vm1: VM<'a, MainFlavor>,
vm2: LookaheadVM<'a>,
accepting: &'a BitSet,
+ submatches: &'a [BString],
}
impl<'a> VirtualMachine<'a> {
@@ -300,11 +301,17 @@ impl<'a> VirtualMachine<'a> {
.iter()
.filter(|t| self.accepting.get(t.pc as usize))
.map(|t| {
- let submatches: Vec<_> = t.data.windows(2).map(|x| Some(x[0]?..x[1]?)).collect();
-
- Match {
- submatches: submatches.into(),
- }
+ let submatches = self
+ .submatches
+ .iter()
+ .enumerate()
+ .filter_map(|(idx, name)| {
+ let begin = t.data[2 * idx]?;
+ let end = t.data[2 * idx + 1]?;
+ Some((name.clone(), begin..end))
+ })
+ .collect();
+ Match { submatches }
})
.next()
}
@@ -327,7 +334,7 @@ pub struct BytecodeCompiledRegex {
instrs1: Box<[Instr<MainFlavor>]>,
instrs2: Box<[Instr<AssertionFlavor>]>,
no_lookbehind: bool,
- submatch_count: usize,
+ submatches: Vec<BString>,
accepting: BitSet,
}
@@ -347,7 +354,7 @@ impl BytecodeCompiledRegex {
&self.instrs1,
Thread {
pc: 0,
- data: vec![None; 2 * self.submatch_count].into(),
+ data: vec![None; 2 * self.submatches.len()].into(),
},
);
let vm2 = VM::new(&self.instrs2, Thread { pc: 0, data: () });
@@ -357,6 +364,7 @@ impl BytecodeCompiledRegex {
vm1,
vm2,
accepting: &self.accepting,
+ submatches: &self.submatches,
};
if self.no_lookbehind {
for (i, ch) in data.iter().cloned().enumerate() {
@@ -392,7 +400,7 @@ struct Compiler<'a, F: Flavor> {
map: HashMap<Pattern, CompiledSnippet>,
assertion_handler: AssertionHandler<'a, F>,
assertion_fork_base: usize,
- submatch_count: usize,
+ submatches: Vec<BString>,
}
fn fork<F: Flavor>(repeat: usize, exit: usize, greedy: GreedyBehavior) -> Instr<F> {
@@ -414,7 +422,7 @@ impl<'a, F: Flavor> Compiler<'a, F> {
map: HashMap::new(),
assertion_handler: Box::new(assertion_handler),
assertion_fork_base: usize::MAX,
- submatch_count: 0,
+ submatches: Vec::new(),
}
}
@@ -523,9 +531,9 @@ impl<'a, F: Flavor> Compiler<'a, F> {
self.instrs.push(ins);
}
Pattern::Nothing => {}
- Pattern::Submatch(pat) => {
- let i = self.submatch_count as u32 * 2;
- self.submatch_count += 1;
+ Pattern::Submatch(match_name, pat) => {
+ let i = self.submatches.len() as u32 * 2;
+ self.submatches.push(match_name);
if let Some(ins) = F::save(i) {
self.instrs.push(Instr::Custom(ins));
}
@@ -605,7 +613,7 @@ impl TryFrom<Pattern> for BytecodeCompiledRegex {
fn try_from(value: Pattern) -> Result<Self, Self::Error> {
let mut neg = assertion_compiler();
let mut pos = assertion_compiler();
- let (final_state, instrs, submatch_count) = {
+ let (final_state, instrs, submatches) = {
let mut main: Compiler<MainFlavor> = Compiler::new(|dir, pol, pat| {
let target = match dir {
LookDirection::Ahead => pos.compile_and_memoize(pat.reverse()),
@@ -622,7 +630,7 @@ impl TryFrom<Pattern> for BytecodeCompiledRegex {
main.compile(value)?;
let end = main.instrs.len();
main.instrs.push(Instr::Class(Class::Nothing));
- (end, main.instrs, main.submatch_count)
+ (end, main.instrs, main.submatches)
};
neg.finalize_assertion_forks();
pos.finalize_assertion_forks();
@@ -636,7 +644,7 @@ impl TryFrom<Pattern> for BytecodeCompiledRegex {
instrs1: instrs.into(),
instrs2: pos.instrs.into(),
accepting,
- submatch_count,
+ submatches,
})
}
}
@@ -675,94 +683,78 @@ mod tests {
#[test]
fn nongreedy_star() {
- let re = regex("(ab*?)bb*");
+ let re = regex("(?<x>ab*?)bb*");
assert_eq!(
- re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbb").unwrap().submatches["x".as_bytes()],
0..1
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches["x".as_bytes()],
0..1
);
}
#[test]
fn greedy_star() {
- let re = regex("(ab*)bb*");
+ let re = regex("(?<x>ab*)bb*");
assert_eq!(
- re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbb").unwrap().submatches["x".as_bytes()],
0..3
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches["x".as_bytes()],
0..5
);
}
#[test]
fn nongreedy_plus() {
- let re = regex("(ab+?)bb*");
+ let re = regex("(?<x>ab+?)bb*");
assert_eq!(
- re.re_match(b"abbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbb").unwrap().submatches["x".as_bytes()],
0..2
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches["x".as_bytes()],
0..2
);
}
#[test]
fn greedy_plus() {
- let re = regex("(ab+)bb*");
+ let re = regex("(?<x>ab+)bb*");
assert_eq!(
- re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbb").unwrap().submatches["x".as_bytes()],
0..3
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches["x".as_bytes()],
0..5
);
}
#[test]
fn nongreedy_qm() {
- let re = regex("(ab??)bb*");
+ let re = regex("(?<x>ab??)bb*");
assert_eq!(
- re.re_match(b"abbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbb").unwrap().submatches["x".as_bytes()],
0..1
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches["x".as_bytes()],
0..1
);
}
#[test]
fn greedy_qm() {
- let re = regex("(ab?)bb*");
+ let re = regex("(?<x>ab?)bb*");
assert_eq!(
- re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbb").unwrap().submatches["x".as_bytes()],
0..2
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0]
- .clone()
- .unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches["x".as_bytes()],
0..2
);
}