diff options
| author | Jonas Maier <> | 2026-03-17 16:37:44 +0100 |
|---|---|---|
| committer | Jonas Maier <> | 2026-03-17 16:37:44 +0100 |
| commit | fb63779507c21b5f0a73fef2dbaa10480b02b126 (patch) | |
| tree | 78eda19b71c2b0e47ac01714fd80b687a7b4b783 /src/parse | |
| parent | 890bde4d5dd1baadbcdb48ba51ecde88c1154f50 (diff) | |
| download | pish-fb63779507c21b5f0a73fef2dbaa10480b02b126.tar.gz | |
better parsing
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 46 | ||||
| -rw-r--r-- | src/parse/test.rs | 94 |
2 files changed, 100 insertions, 40 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 006fce2..9d3164e 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -912,6 +912,20 @@ struct Cursor<'a> { backtrace: bool, } +#[derive(Default)] +struct SpaceStats { + space: u32, + tab: u32, + lf: u32, + cr: u32, +} + +impl SpaceStats { + fn is_empty(&self) -> bool { + self.space == 0 && self.tab == 0 && self.lf == 0 && self.cr == 0 + } +} + impl<'a> Cursor<'a> { fn new(buf: &'a [u8], mode: ParseMode) -> Self { Self { @@ -978,6 +992,31 @@ impl<'a> Cursor<'a> { } } + fn spaces_stats(&mut self) -> SpaceStats { + let mut stats = SpaceStats::default(); + while self.has() && b" \t\n\r".contains(&self.buf[0]) { + match self.buf[0] { + b' ' => stats.space += 1, + b'\t' => stats.tab += 1, + b'\n' => stats.lf += 1, + b'\r' => stats.cr += 1, + _ => unreachable!(), + } + self.adv(); + } + stats + } + + /// returns true if the next thing in the buffer is whitespace (including at least one newline) + /// + /// does not modify the buffer + fn whitespace_newline(&mut self) -> bool { + let x = self.buf; + let s = self.spaces_stats(); + self.buf = x; + s.lf > 0 + } + fn is_completion(&self) -> bool { matches!(self.mode, ParseMode::Completion) } @@ -1015,7 +1054,7 @@ impl Parse for Command<PreExpansion> { fn parse(b: &mut Cursor<'_>) -> Result<Self> { let path: ExpString = b.parse()?; let mut args = Vec::new(); - loop { + while !b.whitespace_newline() { match ExpString::parse(b) { Ok(arg) => args.push(arg), Err(ParseError::NotAString) => break, @@ -1031,7 +1070,8 @@ impl Parse for Pipes<PreExpansion> { let mut cmds: Vec<Command<PreExpansion>> = vec![b.parse()?]; loop { - b.spaces(); + let space_stats = b.spaces_stats(); + if b.is_empty() { return Ok(Pipes { cmds }); } @@ -1040,7 +1080,7 @@ impl Parse for Pipes<PreExpansion> { if c == b'|' { b.adv(); cmds.push(b.parse()?); - } else if is_symbol(c) { + } else if space_stats.lf > 0 || is_symbol(c) { return Ok(Pipes { cmds }); } else { Err(ParseError::Unknown(c))?; diff --git a/src/parse/test.rs b/src/parse/test.rs index ecd5c8e..b3d9440 100644 --- a/src/parse/test.rs +++ b/src/parse/test.rs @@ -4,23 +4,43 @@ fn parse(x: &[u8]) -> Ast<PreExpansion> { do_parse(x).unwrap() } -fn parse_test(l: Ast<PreExpansion>, r: Ast<PreExpansion>) { - if l != r { - let mut left = Vec::new(); - l.cdisplay(&mut left).unwrap(); - let mut right = Vec::new(); - r.cdisplay(&mut right).unwrap(); - let left = String::from_utf8_lossy(&left); - let right = String::from_utf8_lossy(&right); - if left != right { - panic!("parse equality error\nleft: {left}\nright: {right}") +const TIMEOUT_MS: u64 = 100; + +macro_rules! parse_test { + ($l:expr, $r:expr $(,)?) => {{ + let (tx, rx) = std::sync::mpsc::channel(); + + std::thread::spawn(move || { + #[allow(unreachable_code, unused_variables)] + let result = std::panic::catch_unwind(|| { + let l = $l; + let r = $r; + if l != r { + let mut left = Vec::new(); + l.cdisplay(&mut left).unwrap(); + let mut right = Vec::new(); + r.cdisplay(&mut right).unwrap(); + let left = String::from_utf8_lossy(&left); + let right = String::from_utf8_lossy(&right); + if left != right { + panic!("parse equality error\nleft: {left}\nright: {right}") + } + } + }); + let _ = tx.send(result); + }); + + match rx.recv_timeout(std::time::Duration::from_millis(TIMEOUT_MS)) { + Ok(Ok(())) => (), + Ok(Err(e)) => std::panic::resume_unwind(e), + Err(_) => panic!("test timed out after {TIMEOUT_MS} ms"), } - } + }}; } #[test] fn command_interp() { - parse_test( + parse_test!( parse(br#""$(echo echo)""#), pipes([cmd([str([cmdp(pipes([cmd([ estr(b"echo"), @@ -31,7 +51,7 @@ fn command_interp() { #[test] fn string_concat() { - parse_test( + parse_test!( parse(br#" foo'bar'"baz" "#), pipes([cmd([estr(b"foobarbaz")])]), ); @@ -39,17 +59,17 @@ fn string_concat() { #[test] fn simple_string() { - parse_test(parse(b"foo"), pipes([cmd([estr(b"foo")])])); + parse_test!(parse(b"foo"), pipes([cmd([estr(b"foo")])])); } #[test] fn simple_var() { - parse_test(parse(b"$foo"), pipes([cmd([str([var(b"foo")])])])); + parse_test!(parse(b"$foo"), pipes([cmd([str([var(b"foo")])])])); } #[test] fn ls_pipe_cat() { - parse_test( + parse_test!( parse(b"ls | cat"), pipes([cmd([estr(b"ls")]), cmd([estr(b"cat")])]), ); @@ -57,7 +77,7 @@ fn ls_pipe_cat() { #[test] fn ls_pipe_cat_nospace() { - parse_test( + parse_test!( parse(b"ls|cat"), pipes([cmd([estr(b"ls")]), cmd([estr(b"cat")])]), ); @@ -75,7 +95,7 @@ fn unclosed_double_quote() { #[test] fn tilde() { - parse_test( + parse_test!( parse(b"echo ~"), pipes([cmd([estr(b"echo"), str([var(b"HOME")])])]), ); @@ -83,7 +103,7 @@ fn tilde() { #[test] fn tilde2() { - parse_test( + parse_test!( parse(b"echo ~/foo/bar"), pipes([cmd([ estr(b"echo"), @@ -94,7 +114,7 @@ fn tilde2() { #[test] fn tilde3() { - parse_test( + parse_test!( parse(b"echo ~ "), pipes([cmd([estr(b"echo"), str([var(b"HOME")])])]), ); @@ -102,7 +122,7 @@ fn tilde3() { #[test] fn tilde4() { - parse_test( + parse_test!( parse(b"echo ~'x'"), pipes([cmd([estr(b"echo"), estr(b"~x")])]), ); @@ -110,7 +130,7 @@ fn tilde4() { #[test] fn tilde5() { - parse_test( + parse_test!( parse(b"echo ~$FOO"), pipes([cmd([estr(b"echo"), str([plain(b"~"), var(b"FOO")])])]), ); @@ -118,7 +138,7 @@ fn tilde5() { #[test] fn tilde6() { - parse_test( + parse_test!( parse(b"git rebase -i HEAD~10"), pipes([cmd([ estr(b"git"), @@ -131,7 +151,7 @@ fn tilde6() { #[test] fn set_variable_in_fun() { - parse_test( + parse_test!( parse(b"fun setter { set x = 1 }"), decl(estr(b"setter"), block([assign(estr(b"x"), estr(b"1"))])), ); @@ -139,7 +159,7 @@ fn set_variable_in_fun() { #[test] fn variable_with_defaults() { - parse_test( + parse_test!( parse(b"${x:-y}"), pipes([cmd([str([var_default(b"x", estr(b"y"))])])]), ); @@ -147,32 +167,32 @@ fn variable_with_defaults() { #[test] fn escape_newline() { - parse_test(parse(b"\"\\n\""), pipes([cmd([estr(b"\n")])])); + parse_test!(parse(b"\"\\n\""), pipes([cmd([estr(b"\n")])])); } #[test] fn escape_carriage_return() { - parse_test(parse(b"\"\\r\""), pipes([cmd([estr(b"\r")])])); + parse_test!(parse(b"\"\\r\""), pipes([cmd([estr(b"\r")])])); } #[test] fn escape_tab() { - parse_test(parse(b"\"\\t\""), pipes([cmd([estr(b"\t")])])); + parse_test!(parse(b"\"\\t\""), pipes([cmd([estr(b"\t")])])); } #[test] fn escape_hex_1() { - parse_test(parse(b"\\x41"), pipes([cmd([estr(b"A")])])); + parse_test!(parse(b"\\x41"), pipes([cmd([estr(b"A")])])); } #[test] fn escape_hex_2() { - parse_test(parse(b"\\x0a"), pipes([cmd([estr(b"\n")])])); + parse_test!(parse(b"\\x0a"), pipes([cmd([estr(b"\n")])])); } #[test] fn pipe_on_new_line() { - parse_test( + parse_test!( parse(b"cat file \n | cat"), pipes([cmd([estr(b"cat"), estr(b"file")]), cmd([estr(b"cat")])]), ); @@ -180,7 +200,7 @@ fn pipe_on_new_line() { #[test] fn semicolon() { - parse_test( + parse_test!( parse(b"fun f { x ; y }"), decl( estr(b"f"), @@ -191,7 +211,7 @@ fn semicolon() { #[test] fn newline_separates_commands() { - parse_test( + parse_test!( parse(b"fun f { x \n y }"), decl( estr(b"f"), @@ -202,7 +222,7 @@ fn newline_separates_commands() { #[test] fn newline_does_not_separate_pipes() { - parse_test( + parse_test!( parse(b"fun f { x \n| y }"), decl( estr(b"f"), @@ -213,15 +233,15 @@ fn newline_does_not_separate_pipes() { #[test] fn simple_if() { - parse_test(parse(b"if cond { x }"), todo!()); + parse_test!(parse(b"if cond { x }"), todo!()); } #[test] fn if_else() { - parse_test(parse(b"if cond { x } else { y }"), todo!()); + parse_test!(parse(b"if cond { x } else { y }"), todo!()); } #[test] fn simple_while() { - parse_test(parse(b"while cond { x }"), todo!()); + parse_test!(parse(b"while cond { x }"), todo!()); } |
