From 6d5d57d9dd4a558b8e1d6501f6e4ffc0f340c283 Mon Sep 17 00:00:00 2001 From: Jonas Maier Date: Wed, 18 Mar 2026 12:00:02 +0100 Subject: new string delimiter parsing --- src/parse/mod.rs | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 0322c39..172974c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -522,6 +522,140 @@ impl Parse for VarName { } } +#[derive(Clone)] +enum StringDelimiter { + /// no delimiter, i.e. when parsing a simple command like `echo foo` + None, + + /// double quotes, allows interpolation, `echo "foo $var"` + Interp, + + /// single quotes, does not allow interpolation `echo 'foo $vardoesnotexpand'` + Strict, + + /// triple quotes with custom prefix/suffix ``` + /// echo DELIM""" + /// basically + /// a + /// heredoc + /// with + /// $variables + /// """DELIM + /// ``` + InterpCustom(BString), + + /// triple single quotes with custom prefix/suffix ``` + /// echo FOO''' + /// basically + /// a + /// heredoc + /// without + /// variables + /// '''FOO + /// ``` + StrictCustom(BString), +} + +/// gets the largest ident this slice starts with, might be empty +fn peek_ident(b: &[u8]) -> &[u8] { + if b.is_empty() || !b[0].is_ascii_alphabetic() { + return &[]; + } + + let mut out = &b[..1]; + + for i in 1..b.len() { + if b[i].is_ascii_alphanumeric() { + out = &b[..=i]; + } else { + break; + } + } + + out +} + +impl StringDelimiter { + fn try_begin(b: &mut Cursor<'_>) -> Option { + if !b.has() { + return None; + } + + let ident = peek_ident(&b.buf); + if b.buf[ident.len()..].starts_with(b"\"\"\"") { + b.advance(ident.len() + 3); + return Some(Self::InterpCustom(ident.to_vec())); + } + if b.buf[ident.len()..].starts_with(b"'''") { + b.advance(ident.len() + 3); + return Some(Self::StrictCustom(ident.to_vec())); + } + + // at this point we know it's not a custom identifier with triple quotes + + let x = b.peek(); + + if !x.is_ascii_whitespace() && (!is_symbol(x) || x == b'$') { + return Some(Self::None); + } + + if x == b'"' { + b.adv(); + return Some(Self::Interp); + } + + if x == b'\'' { + b.adv(); + return Some(Self::Strict); + } + + None + } + + /// if the current string ends right at the cursor, consumes the string closing tokens and returns true + /// otherwise, consumes no tokens and returns false + fn try_end(&self, b: &mut Cursor<'_>) -> bool { + if !b.has() { + return false; + } + + let x = b.peek(); + let buf = &mut b.buf; + + match self { + StringDelimiter::None if x.is_ascii_whitespace() || is_symbol(x) && x != b'$' => { + b.adv(); + true + } + StringDelimiter::Interp if x == b'"' => { + b.adv(); + true + } + StringDelimiter::Strict if x == b'\'' => { + b.adv(); + true + } + StringDelimiter::InterpCustom(delim) + if buf.len() >= 3 && &buf[..3] == b"\"\"\"" && buf[3..].starts_with(&delim) => + { + b.advance(3 + delim.len()); + true + } + StringDelimiter::StrictCustom(delim) + if buf.len() >= 3 && &buf[..3] == b"'''" && buf[3..].starts_with(&delim) => + { + b.advance(3 + delim.len()); + true + } + _ => false, + } + } + + fn is_strict(&self) -> bool { + matches!(self, Self::Strict | Self::StrictCustom(_)) + } +} + impl Parse for ExpString { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); -- cgit v1.2.3