From c36bf58bd0d3d8d2b89211c0bfccab68dad53d66 Mon Sep 17 00:00:00 2001 From: Jonas Maier <> Date: Sat, 7 Mar 2026 11:35:38 +0100 Subject: finish parsing stuff, add first parsing test --- src/parse.rs | 769 ----------------------------------------------------- src/parse/mod.rs | 777 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/parse/test.rs | 16 ++ 3 files changed, 793 insertions(+), 769 deletions(-) delete mode 100644 src/parse.rs create mode 100644 src/parse/mod.rs create mode 100644 src/parse/test.rs diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 61d268d..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,769 +0,0 @@ -use crate::BString; - -pub trait Stage { - type Str: std::fmt::Debug + Clone; -} - -pub trait CmdDisplay { - fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>; -} - -#[derive(Debug, Clone)] -pub struct PreExpansion; -#[derive(Debug, Clone)] -pub struct PostExpansion; - -impl Stage for PreExpansion { - type Str = ExpString; -} - -impl Stage for PostExpansion { - type Str = BString; -} - -type Res = std::result::Result; - -pub trait Expander { - type Error; - fn expand_var(&mut self, v: BString) -> Res; - fn expand_cmd(&mut self, c: Ast) -> Res; -} - -#[derive(Debug, Clone)] -pub enum Ast { - FunDecl(FunDecl), - VarAssign(VarAssign), - Pipes(Pipes), -} - -pub fn decl(name: ExpString, body: Ast) -> Ast { - Ast::FunDecl(FunDecl { - name: name, - body: FunBody { - body: Box::new(body), - }, - }) -} - -pub fn assign(var: ExpString, val: ExpString) -> Ast { - Ast::VarAssign(VarAssign { var, val }) -} - -pub fn pipes(cmds: [Command; N]) -> Ast { - Ast::Pipes(Pipes { - cmds: cmds.to_vec(), - }) -} - -pub fn estr(x: &[u8]) -> ExpString { - ExpString { - parts: vec![StringPart::Boring(x.to_vec())], - } -} - -pub fn str(parts: [StringPart; N]) -> ExpString { - ExpString { - parts: parts.to_vec(), - } -} - -pub fn plain(x: &[u8]) -> StringPart { - StringPart::Boring(x.to_vec()) -} - -pub fn var(x: &[u8]) -> StringPart { - StringPart::Var(VarName { name: x.to_vec() }) -} - -pub fn cmdp(x: Ast) -> StringPart { - StringPart::Cmd(x) -} - -pub fn cmd(x: [ExpString; N]) -> Command { - Command { - cmd: x[0].clone(), - args: x[1..].to_vec(), - } -} - -impl CmdDisplay for Ast { - fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { - match self { - Ast::FunDecl(fun_decl) => { - write!(w, "decl(")?; - fun_decl.name.cdisplay(w)?; - write!(w, ", ")?; - fun_decl.body.body.cdisplay(w)?; - write!(w, ")")?; - } - Ast::VarAssign(var_assign) => { - write!(w, "assign(")?; - var_assign.var.cdisplay(w)?; - write!(w, ", ")?; - var_assign.val.cdisplay(w)?; - write!(w, ")")?; - } - Ast::Pipes(pipes) => { - write!(w, "pipes([")?; - for cmd in pipes.cmds.iter() { - cmd.cdisplay(w)?; - write!(w, ",")?; - } - write!(w, "])")?; - } - } - Ok(()) - } -} - -impl CmdDisplay for ExpString { - fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { - if self.parts.len() == 1 && self.parts[0].is_boring() { - write!( - w, - "estr({})", - self.parts[0].clone().unwrap_boring().escape_ascii() - ) - } else { - write!(w, "str([")?; - for part in self.parts.iter() { - part.cdisplay(w)?; - } - write!(w, "])") - } - } -} - -impl CmdDisplay for StringPart { - fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { - match self { - StringPart::Boring(items) => { - write!(w, "bstr(")?; - items.as_slice().cdisplay(w)?; - write!(w, ")") - } - StringPart::Var(var_name) => { - write!(w, "var(")?; - var_name.name.as_slice().cdisplay(w)?; - write!(w, ")") - }, - StringPart::Cmd(ast) => { - write!(w, "cmdp(")?; - ast.cdisplay(w)?; - write!(w, ")") - } - } - } -} - -impl CmdDisplay for Command { - fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { - write!(w, "cmd([")?; - self.cmd.cdisplay(w)?; - for arg in self.args.iter() { - write!(w, ", ")?; - arg.cdisplay(w)?; - } - write!(w, "])") - } -} - -impl CmdDisplay for &[u8] { - fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { - write!(w, "b\"")?; - write!(w, "{}", self.escape_ascii())?; - write!(w, "\"") - } -} - -impl Ast { - pub fn expand(self, e: &mut E) -> Res, E::Error> { - match self { - Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)), - Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)), - Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)), - } - } -} - -#[derive(Debug, Clone)] -pub struct FunBody { - pub body: Box>, -} - -impl Parse for FunBody { - fn parse(b: &mut Cursor<'_>) -> Result { - b.spaces(); - - if b.is_empty() { - return Err(ParseError::Eof); - } - - if b.peek() != b'{' { - return Err(ParseError::Expected('{')); - } - - b.adv(); - let body = Box::new(Ast::parse(b)?); - if b.is_empty() { - if b.is_completion() { - Ok(Self { body }) - } else { - Err(ParseError::Eof) - } - } else if b.peek() == b'}' { - Ok(Self { body }) - } else { - Err(ParseError::Expected('}')) - } - } -} - -#[derive(Debug, Clone)] -pub struct FunDecl { - pub name: S::Str, - pub body: FunBody, -} - -impl Parse for FunDecl { - fn parse(b: &mut Cursor<'_>) -> Result { - if !b.buf.starts_with(b"fun ") && !b.buf.starts_with(b"fun\t") { - return Err(ParseError::NotAFunDecl); - } - b.advance(4); - b.spaces(); - let name = ExpString::parse(b)?; - let body = FunBody::parse(b)?; - Ok(Self { name, body }) - } -} - -impl FunDecl { - fn expand(self, e: &mut E) -> Res, E::Error> { - Ok(FunDecl { - name: self.name.expand(e)?, - body: self.body, - }) - } -} - -#[derive(Debug, Clone)] -pub struct VarAssign { - pub var: S::Str, - pub val: S::Str, -} - -impl Parse for VarAssign { - fn parse(b: &mut Cursor<'_>) -> Result { - if !b.buf.starts_with(b"set ") && !b.buf.starts_with(b"set\t") { - return Err(ParseError::NotAVarAssign); - } - b.advance(4); - b.spaces(); - let var = ExpString::parse(b)?; - b.spaces(); - - if b.is_empty() { - return Err(ParseError::Eof); - } - let eq = b.adv(); - if eq != b'=' { - return Err(ParseError::Expected('=')); - } - let val = ExpString::parse(b)?; - - Ok(Self { var, val }) - } -} - -impl VarAssign { - fn expand(self, e: &mut E) -> Res, E::Error> { - Ok(VarAssign { - var: self.var.expand(e)?, - val: self.val.expand(e)?, - }) - } -} - -#[derive(Debug, Clone)] -pub struct Pipes { - pub cmds: Vec>, -} - -impl Pipes { - fn expand(self, e: &mut E) -> Res, E::Error> { - let mut cmds = Vec::with_capacity(self.cmds.len()); - for cmd in self.cmds.into_iter() { - cmds.push(cmd.expand(e)?); - } - Ok(Pipes { cmds }) - } -} - -#[derive(Debug, Clone)] -pub enum StringPart { - Boring(BString), - Var(VarName), - Cmd(Ast), -} - -impl StringPart { - pub fn is_boring(&self) -> bool { - matches!(self, StringPart::Boring(..)) - } - pub fn unwrap_boring(self) -> BString { - match self { - StringPart::Boring(items) => items, - _ => panic!("unwrap on non-boring value"), - } - } -} - -#[derive(Debug, Clone)] -/// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]` -pub struct ExpString { - parts: Vec, -} - -impl ExpString { - fn expand(self, e: &mut E) -> Res { - let mut out = BString::new(); - for part in self.parts.into_iter() { - let mut x = match part { - StringPart::Boring(items) => items, - StringPart::Var(v) => e.expand_var(v.name)?, - StringPart::Cmd(ast) => { - let exp = ast.expand(e)?; - e.expand_cmd(exp)? - } - }; - out.append(&mut x); - } - Ok(out) - } -} - -fn is_symbol(x: u8) -> bool { - match x { - b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' => true, - _ => false, - } -} - -fn is_var_begin(x: u8) -> bool { - x.is_ascii_alphabetic() -} -fn is_var_name(x: u8) -> bool { - x.is_ascii_alphanumeric() || x == b'_' -} - -#[derive(Debug, Clone)] -pub struct VarName { - name: BString, -} - -impl Parse for VarName { - fn parse(b: &mut Cursor<'_>) -> Result { - if b.is_empty() { - return Err(ParseError::Eof); - } - - if !is_var_begin(b.peek()) { - return Err(ParseError::ExpectedAlphabetic); - } - - let mut name = BString::new(); - while b.has() { - let x = b.peek(); - if is_var_name(x) { - b.adv(); - name.push(x) - } else { - break; - } - } - - Ok(Self { name }) - } -} - -impl Parse for ExpString { - fn parse(b: &mut Cursor<'_>) -> Result { - b.spaces(); - if b.is_empty() { - return Err(ParseError::NotAString); - } - - let mut delim = b.peek(); - if delim == b'\'' || delim == b'"' { - b.adv(); - } else if is_symbol(delim) && delim != b'$' { - return Err(ParseError::NotAString); - } else { - delim = b' '; - } - - let mut parts = Vec::new(); - let p = &mut parts; - let mut escaping = false; - - let add_char = |p: &mut Vec, x: u8| match p.last_mut() { - Some(StringPart::Boring(v)) => v.push(x), - _ => p.push(StringPart::Boring(vec![x])), - }; - - while b.has() { - let x = b.peek(); - - if escaping { - add_char(p, x); - escaping = false; - b.adv(); - continue; - } - - if x == delim || (b.peek_space() && delim == b' ') { - if delim != b' ' { - b.adv(); - } - return Ok(Self { parts }); - } - - if delim == b' ' && is_symbol(x) && x != b'$' { - return Ok(Self { parts }); - } - - b.adv(); - - if delim == b'\'' { - // no fancy stuff here - add_char(p, x); - continue; - } - - if x == b'\\' { - escaping = true; - continue; - } - - if x == b'$' { - if !b.has() { - add_char(p, x); - continue; - } - - let x = b.peek(); - - if x == b'?' || x == b'!' { - b.adv(); - p.push(StringPart::Var(VarName { name: vec![x] })) - } else if is_var_begin(x) { - let v = VarName::parse(b)?; - p.push(StringPart::Var(v)); - } else if x == b'{' { - b.adv(); - let v = VarName::parse(b)?; - - if !b.has() { - return Err(ParseError::Eof); - } else if b.peek() == b':' { - todo!(": in var expansion") - } - - if !b.has() { - return Err(ParseError::Eof); - } else if b.peek() != b'}' { - return Err(ParseError::Incomplete); - } - - b.adv(); - p.push(StringPart::Var(v)); - } else if x == b'(' { - b.adv(); - let cmd = Ast::parse(b)?; - b.spaces(); - if b.is_empty() { - return Err(ParseError::Eof); - } else if b.peek() == b')' { - p.push(StringPart::Cmd(cmd)); - } else { - return Err(ParseError::Expected(')')); - } - } else { - // doesn't seem to be a variable or expansion, just add $ back into the string - add_char(p, b'$'); - continue; - } - - continue; - } - - add_char(p, x); - } - - if b.is_completion() || delim == b' ' { - Ok(Self { parts }) - } else { - Err(ParseError::Eof) - } - } -} - -#[derive(Debug, Clone)] -pub struct Command { - pub cmd: T::Str, - pub args: Vec, -} - -impl Command { - fn expand(self, e: &mut E) -> Res, E::Error> { - let cmd = self.cmd.expand(e)?; - let mut args = Vec::with_capacity(self.args.len()); - for arg in self.args.into_iter() { - args.push(arg.expand(e)?); - } - Ok(Command { cmd, args }) - } -} - -#[allow(unused)] -#[derive(Debug)] -pub enum ParseError { - /// "clean" EOF, i.e. not in the middle of something - Eof, - - /// "unclean" EOF, i.e. EOF after beginning a quoted string - Incomplete, - - ExpectedAlphabetic, - - Unknown(u8), - - Expected(char), - - NotAString, - - NotAFunDecl, - - NotAVarAssign, -} - -type Result = std::result::Result; - -pub fn do_parse(x: &[u8]) -> Res, (ParseError, &[u8])> { - let mut c = Cursor::new(x, ParseMode::Command); - match Ast::parse(&mut c) { - Ok(ast) => Ok(ast), - Err(e) => Err((e, c.buf)), - } -} - -pub enum CompletionKind { - Command, - Argument, - None, -} - -pub struct CompletionContext { - pub kind: CompletionKind, - pub partial: BString, -} - -impl CompletionContext { - pub fn none() -> Self { - Self { - kind: CompletionKind::None, - partial: BString::new(), - } - } -} - -fn expstr_cc(s: &ExpString, kind: CompletionKind) -> CompletionContext { - if s.parts.len() > 1 || !s.parts[0].is_boring() { - CompletionContext::none() - } else { - CompletionContext { - kind, - partial: s.parts[0].clone().unwrap_boring().clone(), - } - } -} - -pub fn completion_context<'a>(x: &'a [u8]) -> CompletionContext { - let mut cursor = Cursor::new(x, ParseMode::Completion); - let ast = Ast::parse(&mut cursor); - match ast { - Ok(Ast::Pipes(pipes)) if cursor.spaced == false => { - if let Some(cmd) = pipes.cmds.last() { - if cmd.args.is_empty() { - expstr_cc(&cmd.cmd, CompletionKind::Command) - } else { - expstr_cc(&cmd.args[cmd.args.len() - 1], CompletionKind::Argument) - } - } else { - CompletionContext::none() - } - } - _ => CompletionContext::none(), - } -} - -trait Parse: Sized { - fn parse(b: &mut Cursor<'_>) -> Result; -} - -enum ParseMode { - Command, - Completion, -} - -struct Cursor<'a> { - buf: &'a [u8], - mode: ParseMode, - - /// if the last byte that was consumed was whitespace or part of a word - spaced: bool, - - backtrace: bool, -} - -impl<'a> Cursor<'a> { - fn new(buf: &'a [u8], mode: ParseMode) -> Self { - Self { - buf, - mode, - spaced: false, - backtrace: false, - } - } - - // non empty - fn has(&self) -> bool { - !self.buf.is_empty() - } - - fn is_empty(&self) -> bool { - self.buf.is_empty() - } - - fn bt(&self, word: &str) { - if self.backtrace { - let bt = std::backtrace::Backtrace::capture(); - let bt = format!("{bt}"); - println!("{word} {}\r", self.buf[0] as char); - for l in bt.lines().skip(4).take(2) { - println!("{l}\r"); - } - println!("\r"); - } - } - - fn peek(&self) -> u8 { - self.bt("peek"); - self.buf[0] - } - - fn adv(&mut self) -> u8 { - self.bt("adv"); - let out = self.buf[0]; - self.buf = &self.buf[1..]; - self.spaced = false; - out - } - - fn advance(&mut self, amt: usize) -> &[u8] { - self.bt(&format!("adv({amt})")); - let out = &self.buf[..amt]; - self.buf = &self.buf[amt..]; - self.spaced = false; - out - } - - fn peek_space(&self) -> bool { - if self.buf.is_empty() { - return false; - } - matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r') - } - - fn spaces(&mut self) { - while self.peek_space() { - self.adv(); - self.spaced = true; - } - } - - fn is_completion(&self) -> bool { - match self.mode { - ParseMode::Completion => true, - _ => false, - } - } - - fn parse(&mut self) -> Result { - T::parse(self) - } -} - -impl Parse for Ast { - fn parse(b: &mut Cursor<'_>) -> Result { - b.spaces(); - - let orig_len = b.buf.len(); - let x = VarAssign::parse(b); - if let Ok(va) = x { - return Ok(Self::VarAssign(va)); - } else if b.buf.len() != orig_len { - x?; - } - - let orig_len = b.buf.len(); - let x = FunDecl::parse(b); - if let Ok(fd) = x { - return Ok(Self::FunDecl(fd)); - } else if b.buf.len() != orig_len { - x?; - } - - Ok(Self::Pipes(b.parse()?)) - } -} - -impl Parse for Command { - fn parse(b: &mut Cursor<'_>) -> Result { - let path: ExpString = b.parse()?; - let mut args = Vec::new(); - loop { - match ExpString::parse(b) { - Ok(arg) => args.push(arg), - Err(ParseError::NotAString) => break, - Err(e) => Err(e)?, - } - } - let x = Ok(Self { cmd: path, args }); - x - } -} - -impl Parse for Pipes { - fn parse(b: &mut Cursor<'_>) -> Result { - let mut cmds: Vec> = vec![b.parse()?]; - - loop { - b.spaces(); - if b.is_empty() { - return Ok(Pipes { cmds }); - } - - let c = b.peek(); - if c == b'|' { - b.adv(); - cmds.push(b.parse()?); - } else if is_symbol(c) { - return Ok(Pipes { cmds }); - } else { - Err(ParseError::Unknown(c))?; - } - } - } -} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..4f38f9b --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,777 @@ +use crate::BString; + +#[cfg(test)] +mod test; + +pub trait Stage : PartialEq { + type Str: std::fmt::Debug + Clone + PartialEq; +} + +pub trait CmdDisplay { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>; +} + +#[derive(Debug, Clone, PartialEq)] +pub struct PreExpansion; +#[derive(Debug, Clone, PartialEq)] +pub struct PostExpansion; + +impl Stage for PreExpansion { + type Str = ExpString; +} + +impl Stage for PostExpansion { + type Str = BString; +} + +type Res = std::result::Result; + +pub trait Expander { + type Error; + fn expand_var(&mut self, v: BString) -> Res; + fn expand_cmd(&mut self, c: Ast) -> Res; +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Ast { + FunDecl(FunDecl), + VarAssign(VarAssign), + Pipes(Pipes), +} + +pub fn decl(name: ExpString, body: Ast) -> Ast { + Ast::FunDecl(FunDecl { + name: name, + body: FunBody { + body: Box::new(body), + }, + }) +} + +pub fn assign(var: ExpString, val: ExpString) -> Ast { + Ast::VarAssign(VarAssign { var, val }) +} + +pub fn pipes(cmds: [Command; N]) -> Ast { + Ast::Pipes(Pipes { + cmds: cmds.to_vec(), + }) +} + +pub fn estr(x: &[u8]) -> ExpString { + ExpString { + parts: vec![StringPart::Boring(x.to_vec())], + } +} + +pub fn str(parts: [StringPart; N]) -> ExpString { + ExpString { + parts: parts.to_vec(), + } +} + +pub fn plain(x: &[u8]) -> StringPart { + StringPart::Boring(x.to_vec()) +} + +pub fn var(x: &[u8]) -> StringPart { + StringPart::Var(VarName { name: x.to_vec() }) +} + +pub fn cmdp(x: Ast) -> StringPart { + StringPart::Cmd(x) +} + +pub fn cmd(x: [ExpString; N]) -> Command { + Command { + cmd: x[0].clone(), + args: x[1..].to_vec(), + } +} + +impl CmdDisplay for Ast { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + match self { + Ast::FunDecl(fun_decl) => { + write!(w, "decl(")?; + fun_decl.name.cdisplay(w)?; + write!(w, ", ")?; + fun_decl.body.body.cdisplay(w)?; + write!(w, ")")?; + } + Ast::VarAssign(var_assign) => { + write!(w, "assign(")?; + var_assign.var.cdisplay(w)?; + write!(w, ", ")?; + var_assign.val.cdisplay(w)?; + write!(w, ")")?; + } + Ast::Pipes(pipes) => { + write!(w, "pipes([")?; + for cmd in pipes.cmds.iter() { + cmd.cdisplay(w)?; + write!(w, ",")?; + } + write!(w, "])")?; + } + } + Ok(()) + } +} + +impl CmdDisplay for ExpString { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + if self.parts.len() == 1 && self.parts[0].is_boring() { + write!( + w, + "estr(b\"{}\")", + self.parts[0].clone().unwrap_boring().escape_ascii() + ) + } else { + write!(w, "str([")?; + let mut first = true; + for part in self.parts.iter() { + if !first { + write!(w, ",")?; + } + first = false; + part.cdisplay(w)?; + } + write!(w, "])") + } + } +} + +impl CmdDisplay for StringPart { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + match self { + StringPart::Boring(items) => { + write!(w, "plain(")?; + items.as_slice().cdisplay(w)?; + write!(w, ")") + } + StringPart::Var(var_name) => { + write!(w, "var(")?; + var_name.name.as_slice().cdisplay(w)?; + write!(w, ")") + }, + StringPart::Cmd(ast) => { + write!(w, "cmdp(")?; + ast.cdisplay(w)?; + write!(w, ")") + } + } + } +} + +impl CmdDisplay for Command { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + write!(w, "cmd([")?; + self.cmd.cdisplay(w)?; + for arg in self.args.iter() { + write!(w, ", ")?; + arg.cdisplay(w)?; + } + write!(w, "])") + } +} + +impl CmdDisplay for &[u8] { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + write!(w, "b\"")?; + write!(w, "{}", self.escape_ascii())?; + write!(w, "\"") + } +} + +impl Ast { + pub fn expand(self, e: &mut E) -> Res, E::Error> { + match self { + Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)), + Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)), + Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct FunBody { + pub body: Box>, +} + +impl Parse for FunBody { + fn parse(b: &mut Cursor<'_>) -> Result { + b.spaces(); + + if b.is_empty() { + return Err(ParseError::Eof); + } + + if b.peek() != b'{' { + return Err(ParseError::Expected('{')); + } + + b.adv(); + let body = Box::new(Ast::parse(b)?); + if b.is_empty() { + if b.is_completion() { + Ok(Self { body }) + } else { + Err(ParseError::Eof) + } + } else if b.peek() == b'}' { + Ok(Self { body }) + } else { + Err(ParseError::Expected('}')) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct FunDecl { + pub name: S::Str, + pub body: FunBody, +} + +impl Parse for FunDecl { + fn parse(b: &mut Cursor<'_>) -> Result { + if !b.buf.starts_with(b"fun ") && !b.buf.starts_with(b"fun\t") { + return Err(ParseError::NotAFunDecl); + } + b.advance(4); + b.spaces(); + let name = ExpString::parse(b)?; + let body = FunBody::parse(b)?; + Ok(Self { name, body }) + } +} + +impl FunDecl { + fn expand(self, e: &mut E) -> Res, E::Error> { + Ok(FunDecl { + name: self.name.expand(e)?, + body: self.body, + }) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VarAssign { + pub var: S::Str, + pub val: S::Str, +} + +impl Parse for VarAssign { + fn parse(b: &mut Cursor<'_>) -> Result { + if !b.buf.starts_with(b"set ") && !b.buf.starts_with(b"set\t") { + return Err(ParseError::NotAVarAssign); + } + b.advance(4); + b.spaces(); + let var = ExpString::parse(b)?; + b.spaces(); + + if b.is_empty() { + return Err(ParseError::Eof); + } + let eq = b.adv(); + if eq != b'=' { + return Err(ParseError::Expected('=')); + } + let val = ExpString::parse(b)?; + + Ok(Self { var, val }) + } +} + +impl VarAssign { + fn expand(self, e: &mut E) -> Res, E::Error> { + Ok(VarAssign { + var: self.var.expand(e)?, + val: self.val.expand(e)?, + }) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Pipes { + pub cmds: Vec>, +} + +impl Pipes { + fn expand(self, e: &mut E) -> Res, E::Error> { + let mut cmds = Vec::with_capacity(self.cmds.len()); + for cmd in self.cmds.into_iter() { + cmds.push(cmd.expand(e)?); + } + Ok(Pipes { cmds }) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum StringPart { + Boring(BString), + Var(VarName), + Cmd(Ast), +} + +impl StringPart { + pub fn is_boring(&self) -> bool { + matches!(self, StringPart::Boring(..)) + } + pub fn unwrap_boring(self) -> BString { + match self { + StringPart::Boring(items) => items, + _ => panic!("unwrap on non-boring value"), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +/// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]` +pub struct ExpString { + parts: Vec, +} + +impl ExpString { + fn expand(self, e: &mut E) -> Res { + let mut out = BString::new(); + for part in self.parts.into_iter() { + let mut x = match part { + StringPart::Boring(items) => items, + StringPart::Var(v) => e.expand_var(v.name)?, + StringPart::Cmd(ast) => { + let exp = ast.expand(e)?; + e.expand_cmd(exp)? + } + }; + out.append(&mut x); + } + Ok(out) + } +} + +fn is_symbol(x: u8) -> bool { + match x { + b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' => true, + _ => false, + } +} + +fn is_var_begin(x: u8) -> bool { + x.is_ascii_alphabetic() +} +fn is_var_name(x: u8) -> bool { + x.is_ascii_alphanumeric() || x == b'_' +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VarName { + name: BString, +} + +impl Parse for VarName { + fn parse(b: &mut Cursor<'_>) -> Result { + if b.is_empty() { + return Err(ParseError::Eof); + } + + if !is_var_begin(b.peek()) { + return Err(ParseError::ExpectedAlphabetic); + } + + let mut name = BString::new(); + while b.has() { + let x = b.peek(); + if is_var_name(x) { + b.adv(); + name.push(x) + } else { + break; + } + } + + Ok(Self { name }) + } +} + +impl Parse for ExpString { + fn parse(b: &mut Cursor<'_>) -> Result { + b.spaces(); + if b.is_empty() { + return Err(ParseError::NotAString); + } + + let mut delim = b.peek(); + if delim == b'\'' || delim == b'"' { + b.adv(); + } else if is_symbol(delim) && delim != b'$' { + return Err(ParseError::NotAString); + } else { + delim = b' '; + } + + let mut parts = Vec::new(); + let p = &mut parts; + let mut escaping = false; + + let add_char = |p: &mut Vec, x: u8| match p.last_mut() { + Some(StringPart::Boring(v)) => v.push(x), + _ => p.push(StringPart::Boring(vec![x])), + }; + + while b.has() { + let x = b.peek(); + + if escaping { + add_char(p, x); + escaping = false; + b.adv(); + continue; + } + + if x == delim || (b.peek_space() && delim == b' ') { + if delim != b' ' { + b.adv(); + } + return Ok(Self { parts }); + } + + if delim == b' ' && is_symbol(x) && x != b'$' { + return Ok(Self { parts }); + } + + b.adv(); + + if delim == b'\'' { + // no fancy stuff here + add_char(p, x); + continue; + } + + if x == b'\\' { + escaping = true; + continue; + } + + if x == b'$' { + if !b.has() { + add_char(p, x); + continue; + } + + let x = b.peek(); + + if x == b'?' || x == b'!' { + b.adv(); + p.push(StringPart::Var(VarName { name: vec![x] })) + } else if is_var_begin(x) { + let v = VarName::parse(b)?; + p.push(StringPart::Var(v)); + } else if x == b'{' { + b.adv(); + let v = VarName::parse(b)?; + + if !b.has() { + return Err(ParseError::Eof); + } else if b.peek() == b':' { + todo!(": in var expansion") + } + + if !b.has() { + return Err(ParseError::Eof); + } else if b.peek() != b'}' { + return Err(ParseError::Incomplete); + } + + b.adv(); + p.push(StringPart::Var(v)); + } else if x == b'(' { + b.adv(); + let cmd = Ast::parse(b)?; + b.spaces(); + if b.is_empty() { + return Err(ParseError::Eof); + } else if b.peek() == b')' { + p.push(StringPart::Cmd(cmd)); + } else { + return Err(ParseError::Expected(')')); + } + } else { + // doesn't seem to be a variable or expansion, just add $ back into the string + add_char(p, b'$'); + continue; + } + + continue; + } + + add_char(p, x); + } + + if b.is_completion() || delim == b' ' { + Ok(Self { parts }) + } else { + Err(ParseError::Eof) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Command { + pub cmd: T::Str, + pub args: Vec, +} + +impl Command { + fn expand(self, e: &mut E) -> Res, E::Error> { + let cmd = self.cmd.expand(e)?; + let mut args = Vec::with_capacity(self.args.len()); + for arg in self.args.into_iter() { + args.push(arg.expand(e)?); + } + Ok(Command { cmd, args }) + } +} + +#[allow(unused)] +#[derive(Debug)] +pub enum ParseError { + /// "clean" EOF, i.e. not in the middle of something + Eof, + + /// "unclean" EOF, i.e. EOF after beginning a quoted string + Incomplete, + + ExpectedAlphabetic, + + Unknown(u8), + + Expected(char), + + NotAString, + + NotAFunDecl, + + NotAVarAssign, +} + +type Result = std::result::Result; + +pub fn do_parse(x: &[u8]) -> Res, (ParseError, &[u8])> { + let mut c = Cursor::new(x, ParseMode::Command); + match Ast::parse(&mut c) { + Ok(ast) => Ok(ast), + Err(e) => Err((e, c.buf)), + } +} + +pub enum CompletionKind { + Command, + Argument, + None, +} + +pub struct CompletionContext { + pub kind: CompletionKind, + pub partial: BString, +} + +impl CompletionContext { + pub fn none() -> Self { + Self { + kind: CompletionKind::None, + partial: BString::new(), + } + } +} + +fn expstr_cc(s: &ExpString, kind: CompletionKind) -> CompletionContext { + if s.parts.len() > 1 || !s.parts[0].is_boring() { + CompletionContext::none() + } else { + CompletionContext { + kind, + partial: s.parts[0].clone().unwrap_boring().clone(), + } + } +} + +pub fn completion_context<'a>(x: &'a [u8]) -> CompletionContext { + let mut cursor = Cursor::new(x, ParseMode::Completion); + let ast = Ast::parse(&mut cursor); + match ast { + Ok(Ast::Pipes(pipes)) if cursor.spaced == false => { + if let Some(cmd) = pipes.cmds.last() { + if cmd.args.is_empty() { + expstr_cc(&cmd.cmd, CompletionKind::Command) + } else { + expstr_cc(&cmd.args[cmd.args.len() - 1], CompletionKind::Argument) + } + } else { + CompletionContext::none() + } + } + _ => CompletionContext::none(), + } +} + +trait Parse: Sized { + fn parse(b: &mut Cursor<'_>) -> Result; +} + +enum ParseMode { + Command, + Completion, +} + +struct Cursor<'a> { + buf: &'a [u8], + mode: ParseMode, + + /// if the last byte that was consumed was whitespace or part of a word + spaced: bool, + + backtrace: bool, +} + +impl<'a> Cursor<'a> { + fn new(buf: &'a [u8], mode: ParseMode) -> Self { + Self { + buf, + mode, + spaced: false, + backtrace: false, + } + } + + // non empty + fn has(&self) -> bool { + !self.buf.is_empty() + } + + fn is_empty(&self) -> bool { + self.buf.is_empty() + } + + fn bt(&self, word: &str) { + if self.backtrace { + let bt = std::backtrace::Backtrace::capture(); + let bt = format!("{bt}"); + println!("{word} {}\r", self.buf[0] as char); + for l in bt.lines().skip(4).take(2) { + println!("{l}\r"); + } + println!("\r"); + } + } + + fn peek(&self) -> u8 { + self.bt("peek"); + self.buf[0] + } + + fn adv(&mut self) -> u8 { + self.bt("adv"); + let out = self.buf[0]; + self.buf = &self.buf[1..]; + self.spaced = false; + out + } + + fn advance(&mut self, amt: usize) -> &[u8] { + self.bt(&format!("adv({amt})")); + let out = &self.buf[..amt]; + self.buf = &self.buf[amt..]; + self.spaced = false; + out + } + + fn peek_space(&self) -> bool { + if self.buf.is_empty() { + return false; + } + matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r') + } + + fn spaces(&mut self) { + while self.peek_space() { + self.adv(); + self.spaced = true; + } + } + + fn is_completion(&self) -> bool { + match self.mode { + ParseMode::Completion => true, + _ => false, + } + } + + fn parse(&mut self) -> Result { + T::parse(self) + } +} + +impl Parse for Ast { + fn parse(b: &mut Cursor<'_>) -> Result { + b.spaces(); + + let orig_len = b.buf.len(); + let x = VarAssign::parse(b); + if let Ok(va) = x { + return Ok(Self::VarAssign(va)); + } else if b.buf.len() != orig_len { + x?; + } + + let orig_len = b.buf.len(); + let x = FunDecl::parse(b); + if let Ok(fd) = x { + return Ok(Self::FunDecl(fd)); + } else if b.buf.len() != orig_len { + x?; + } + + Ok(Self::Pipes(b.parse()?)) + } +} + +impl Parse for Command { + fn parse(b: &mut Cursor<'_>) -> Result { + let path: ExpString = b.parse()?; + let mut args = Vec::new(); + loop { + match ExpString::parse(b) { + Ok(arg) => args.push(arg), + Err(ParseError::NotAString) => break, + Err(e) => Err(e)?, + } + } + let x = Ok(Self { cmd: path, args }); + x + } +} + +impl Parse for Pipes { + fn parse(b: &mut Cursor<'_>) -> Result { + let mut cmds: Vec> = vec![b.parse()?]; + + loop { + b.spaces(); + if b.is_empty() { + return Ok(Pipes { cmds }); + } + + let c = b.peek(); + if c == b'|' { + b.adv(); + cmds.push(b.parse()?); + } else if is_symbol(c) { + return Ok(Pipes { cmds }); + } else { + Err(ParseError::Unknown(c))?; + } + } + } +} diff --git a/src/parse/test.rs b/src/parse/test.rs new file mode 100644 index 0000000..6cd7793 --- /dev/null +++ b/src/parse/test.rs @@ -0,0 +1,16 @@ +use super::*; + +fn parse(x: &[u8]) -> Ast { + do_parse(x).unwrap() +} + +#[test] +fn command_interp() { + assert_eq!( + parse(br#"echo "$(echo hi)""#), + pipes([cmd([ + estr(b"echo"), + str([cmdp(pipes([cmd([estr(b"echo"), estr(b"hi")]),])),]) + ]),]) + ); +} -- cgit v1.2.3