From 4eeb79c190bd08e761d097cd10382f09a9ee4348 Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Fri, 5 Jun 2026 22:37:48 +0200 Subject: regex: greedy/non-greedy tests --- src/parse/regex/bc.rs | 116 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 101 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/parse/regex/bc.rs b/src/parse/regex/bc.rs index 571386e..72ba21b 100644 --- a/src/parse/regex/bc.rs +++ b/src/parse/regex/bc.rs @@ -139,7 +139,7 @@ impl<'p, F: Flavor> VM<'p, F> { let bit = t.pc as usize; if !warm.get(bit) { warm.set(bit, true); - threads.push_back(t); + threads.push_front(t); } }}; } @@ -158,12 +158,12 @@ impl<'p, F: Flavor> VM<'p, F> { } Instr::Fork(a, b) => { add_thread!(Thread { - pc: a, + pc: b, data: thread.data.clone(), }); add_thread!(Thread { - pc: b, - data: thread.data, + pc: a, + data: thread.data.clone(), }); } Instr::Custom(instr) => { @@ -612,15 +612,101 @@ impl TryFrom for BytecodeCompiledRegex { } #[cfg(test)] -use crate::parse::Parse; - -#[test] -fn print_compiled_vm() { - let pat = Pattern::parse_from_bytes(b"a?b?").unwrap(); - let compiled = BytecodeCompiledRegex::try_from(pat).unwrap(); - println!("{compiled:#?}"); - assert_eq!(compiled.matches(b"ab"), true); - assert_eq!(compiled.matches(b"a"), true); - assert_eq!(compiled.matches(b"b"), true); - assert_eq!(compiled.matches(b""), true); +mod tests { + use super::*; + use crate::parse::Parse; + + fn regex(s: &str) -> BytecodeCompiledRegex { + let pat = Pattern::parse_from_bytes(s.as_bytes()).unwrap(); + let compiled = BytecodeCompiledRegex::try_from(pat).unwrap(); + compiled + } + + #[test] + fn print_compiled_vm() { + let compiled = regex("a?b?"); + println!("{compiled:#?}"); + assert_eq!(compiled.matches(b"ab"), true); + assert_eq!(compiled.matches(b"a"), true); + assert_eq!(compiled.matches(b"b"), true); + assert_eq!(compiled.matches(b""), true); + } + + #[test] + fn nongreedy_star() { + let re = regex("(ab*?)bb*"); + assert_eq!( + re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(), + 0..1 + ); + assert_eq!( + re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(), + 0..1 + ); + } + + #[test] + fn greedy_star() { + let re = regex("(ab*)bb*"); + assert_eq!( + re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(), + 0..3 + ); + assert_eq!( + re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(), + 0..5 + ); + } + + #[test] + fn nongreedy_plus() { + let re = regex("(ab+?)bb*"); + assert_eq!( + re.re_match(b"abbbb").unwrap().submatches[0].clone().unwrap(), + 0..2 + ); + assert_eq!( + re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(), + 0..2 + ); + } + + #[test] + fn greedy_plus() { + let re = regex("(ab+)bb*"); + assert_eq!( + re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(), + 0..3 + ); + assert_eq!( + re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(), + 0..5 + ); + } + + #[test] + fn nongreedy_qm() { + let re = regex("(ab??)bb*"); + assert_eq!( + re.re_match(b"abbbb").unwrap().submatches[0].clone().unwrap(), + 0..1 + ); + assert_eq!( + re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(), + 0..1 + ); + } + + #[test] + fn greedy_qm() { + let re = regex("(ab?)bb*"); + assert_eq!( + re.re_match(b"abbb").unwrap().submatches[0].clone().unwrap(), + 0..2 + ); + assert_eq!( + re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(), + 0..2 + ); + } } -- cgit v1.2.3