use crate::BString; #[cfg(test)] mod test; pub trait Stage: PartialEq { type Str: std::fmt::Debug + Clone + PartialEq; } pub trait CmdDisplay { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>; } #[derive(Debug, Clone, PartialEq)] pub struct PreExpansion; #[derive(Debug, Clone, PartialEq)] pub struct PostExpansion; impl Stage for PreExpansion { type Str = ExpString; } impl Stage for PostExpansion { type Str = BString; } type Res = std::result::Result; pub trait Expander { type Error; fn expand_var(&mut self, v: BString, default: Option) -> Res; fn expand_cmd(&mut self, c: Ast) -> Res; } #[derive(Debug, Clone, PartialEq)] pub enum Ast { FunDecl(FunDecl), VarAssign(VarAssign), Pipes(Pipes), } pub fn decl(name: ExpString, body: Ast) -> Ast { Ast::FunDecl(FunDecl { name: name, body: FunBody { body: Box::new(body), }, }) } pub fn assign(var: ExpString, val: ExpString) -> Ast { Ast::VarAssign(VarAssign { var, val }) } pub fn pipes(cmds: [Command; N]) -> Ast { Ast::Pipes(Pipes { cmds: cmds.to_vec(), }) } pub fn estr(x: &[u8]) -> ExpString { ExpString { parts: vec![StringPart::Boring(x.to_vec())], delim: b' ', } } pub fn str(parts: [StringPart; N]) -> ExpString { ExpString { parts: parts.to_vec(), delim: b' ', } } pub fn plain(x: &[u8]) -> StringPart { StringPart::Boring(x.to_vec()) } pub fn var(x: &[u8]) -> StringPart { StringPart::Var(Var { name: VarName { name: x.to_vec() }, default: None, already_complete: true, }) } pub fn var_default(x: &[u8], default: ExpString) -> StringPart { StringPart::Var(Var { name: VarName { name: x.to_vec() }, default: Some(default), already_complete: true, }) } pub fn cmdp(x: Ast) -> StringPart { StringPart::Cmd(x) } pub fn cmd(x: [ExpString; N]) -> Command { Command { cmd: x[0].clone(), args: x[1..].to_vec(), } } impl CmdDisplay for Ast { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { match self { Ast::FunDecl(fun_decl) => { write!(w, "decl(")?; fun_decl.name.cdisplay(w)?; write!(w, ", ")?; fun_decl.body.body.cdisplay(w)?; write!(w, ")")?; } Ast::VarAssign(var_assign) => { write!(w, "assign(")?; var_assign.var.cdisplay(w)?; write!(w, ", ")?; var_assign.val.cdisplay(w)?; write!(w, ")")?; } Ast::Pipes(pipes) => { write!(w, "pipes([")?; for cmd in pipes.cmds.iter() { cmd.cdisplay(w)?; write!(w, ",")?; } write!(w, "])")?; } } Ok(()) } } impl CmdDisplay for ExpString { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { if self.parts.len() == 1 && self.parts[0].is_boring() { write!( w, "estr(b\"{}\")", self.parts[0].clone().unwrap_boring().escape_ascii() ) } else { write!(w, "str([")?; let mut first = true; for part in self.parts.iter() { if !first { write!(w, ",")?; } first = false; part.cdisplay(w)?; } write!(w, "])") } } } impl CmdDisplay for StringPart { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { match self { StringPart::Boring(items) => { write!(w, "plain(")?; items.as_slice().cdisplay(w)?; write!(w, ")") } StringPart::Var(var) => { if let Some(default) = &var.default { write!(w, "var_default(")?; var.name.name.as_slice().cdisplay(w)?; write!(w, ",")?; default.cdisplay(w)?; write!(w, ")") } else { write!(w, "var(")?; var.name.name.as_slice().cdisplay(w)?; write!(w, ")") } } StringPart::Cmd(ast) => { write!(w, "cmdp(")?; ast.cdisplay(w)?; write!(w, ")") } } } } impl CmdDisplay for Command { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { write!(w, "cmd([")?; self.cmd.cdisplay(w)?; for arg in self.args.iter() { write!(w, ", ")?; arg.cdisplay(w)?; } write!(w, "])") } } impl CmdDisplay for &[u8] { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { write!(w, "b\"")?; write!(w, "{}", self.escape_ascii())?; write!(w, "\"") } } impl Ast { pub fn expand(self, e: &mut E) -> Res, E::Error> { match self { Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)), Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)), Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)), } } } #[derive(Debug, Clone, PartialEq)] pub struct FunBody { pub body: Box>, } impl Parse for FunBody { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); if b.is_empty() { return Err(ParseError::Eof); } if b.peek() != b'{' { return Err(ParseError::Expected('{')); } b.adv(); let body = Box::new(Ast::parse(b)?); b.spaces(); if b.is_empty() { if b.is_completion() { Ok(Self { body }) } else { Err(ParseError::Eof) } } else if b.peek() == b'}' { Ok(Self { body }) } else { Err(ParseError::Expected('}')) } } } #[derive(Debug, Clone, PartialEq)] pub struct FunDecl { pub name: S::Str, pub body: FunBody, } impl Parse for FunDecl { fn parse(b: &mut Cursor<'_>) -> Result { if !b.buf.starts_with(b"fun ") && !b.buf.starts_with(b"fun\t") { return Err(ParseError::NotAFunDecl); } b.advance(4); b.spaces(); let name = ExpString::parse(b)?; let body = FunBody::parse(b)?; Ok(Self { name, body }) } } impl FunDecl { fn expand(self, e: &mut E) -> Res, E::Error> { Ok(FunDecl { name: self.name.expand(e)?, body: self.body, }) } } #[derive(Debug, Clone, PartialEq)] pub struct VarAssign { pub var: S::Str, pub val: S::Str, } impl Parse for VarAssign { fn parse(b: &mut Cursor<'_>) -> Result { if !b.buf.starts_with(b"set ") && !b.buf.starts_with(b"set\t") { return Err(ParseError::NotAVarAssign); } b.advance(4); b.spaces(); let var = ExpString::parse(b)?; b.spaces(); if b.is_empty() { return Err(ParseError::Eof); } let eq = b.adv(); if eq != b'=' { return Err(ParseError::Expected('=')); } let val = ExpString::parse(b)?; Ok(Self { var, val }) } } impl VarAssign { fn expand(self, e: &mut E) -> Res, E::Error> { Ok(VarAssign { var: self.var.expand(e)?, val: self.val.expand(e)?, }) } } #[derive(Debug, Clone, PartialEq)] pub struct Pipes { pub cmds: Vec>, } impl Pipes { fn expand(self, e: &mut E) -> Res, E::Error> { let mut cmds = Vec::with_capacity(self.cmds.len()); for cmd in self.cmds.into_iter() { cmds.push(cmd.expand(e)?); } Ok(Pipes { cmds }) } } #[derive(Debug, Clone, PartialEq)] pub enum StringPart { Boring(BString), Var(Var), Cmd(Ast), } #[derive(Debug, Clone, PartialEq)] pub struct Var { name: VarName, default: Option, /// if pressing tab right after the parsed variable should not try to complete the variable /// /// i.e. `${HOM}` -> true, `$HOM` -> false, `${HOM` -> false already_complete: bool, } impl Var { pub fn new(name: VarName) -> Self { Self { name, default: None, already_complete: false, } } } impl StringPart { pub fn is_boring(&self) -> bool { matches!(self, StringPart::Boring(..)) } pub fn unwrap_boring(self) -> BString { match self { StringPart::Boring(items) => items, _ => panic!("unwrap on non-boring value"), } } } #[derive(Debug, Clone, PartialEq)] /// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]` pub struct ExpString { parts: Vec, delim: u8, } impl ExpString { fn expand(self, e: &mut E) -> Res { let mut out = BString::new(); for part in self.parts.into_iter() { let mut x = match part { StringPart::Boring(items) => items, StringPart::Var(v) => { let default = match v.default { Some(default) => Some(default.expand(e)?), None => None, }; e.expand_var(v.name.name, default)? } StringPart::Cmd(ast) => { let exp = ast.expand(e)?; e.expand_cmd(exp)? } }; out.append(&mut x); } Ok(out) } } fn is_symbol(x: u8) -> bool { match x { b';' | b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' => true, _ => false, } } fn is_var_begin(x: u8) -> bool { x.is_ascii_alphanumeric() } fn is_var_name(x: u8) -> bool { x.is_ascii_alphanumeric() || x == b'_' } #[derive(Debug, Clone, PartialEq)] pub struct VarName { name: BString, } impl Parse for VarName { fn parse(b: &mut Cursor<'_>) -> Result { if b.is_empty() { return Err(ParseError::Eof); } let mut name = BString::new(); if b.peek().is_ascii_digit() { while b.has() && b.peek().is_ascii_digit() { name.push(b.adv()); } return Ok(Self { name }); } if !is_var_begin(b.peek()) { return Err(ParseError::ExpectedAlphabetic); } while b.has() { let x = b.peek(); if is_var_name(x) { b.adv(); name.push(x) } else { break; } } Ok(Self { name }) } } impl Parse for ExpString { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); if b.is_empty() { return Err(ParseError::NotAString); } let mut parts = Vec::new(); let p = &mut parts; let mut escaping = false; let add_char = |p: &mut Vec, x: u8| match p.last_mut() { Some(StringPart::Boring(v)) => v.push(x), _ => p.push(StringPart::Boring(vec![x])), }; let mut already_parsed = false; 'cont: while b.has() { let mut delim = b.peek(); if delim == b'\'' || delim == b'"' { b.adv(); } else if is_symbol(delim) && delim != b'$' && delim != b'\\' { return if already_parsed { Ok(Self { parts, delim }) } else { Err(ParseError::NotAString) }; } else { delim = b' '; } already_parsed = false; while b.has() { let x = b.peek(); if escaping { let x = match x { b'n' => b'\n', b'r' => b'\r', b't' => b'\t', b'e' => 0x1b, // escape b'x' => { // parse two hex digits b.adv(); if b.buf.len() < 2 { Err(ParseError::Eof)?; } let x1 = b.peek(); b.adv(); let x2 = b.peek(); if !x1.is_ascii_hexdigit() || !x2.is_ascii_hexdigit() { Err(ParseError::NotHexDigit)?; } let x1 = (x1 as char).to_digit(16).unwrap_or(0); let x2 = (x2 as char).to_digit(16).unwrap_or(0); ((x1 << 4) | x2) as u8 } _ => x, }; add_char(p, x); escaping = false; already_parsed = true; b.adv(); continue; } if delim == b' ' && (x.is_ascii_whitespace() || (is_symbol(x) && x != b'$')) { if x == b'\'' || x == b'"' { break; } else { return Ok(Self { parts, delim }); } } if x == delim { b.adv(); already_parsed = true; continue 'cont; } b.adv(); if delim == b'\'' { // no fancy stuff here add_char(p, x); continue; } if x == b'\\' { escaping = true; continue; } if x == b'$' { if !b.has() { add_char(p, x); continue; } let x = b.peek(); if x == b'?' || x == b'!' { b.adv(); p.push(StringPart::Var(Var::new(VarName { name: vec![x] }))) } else if is_var_begin(x) { let v = VarName::parse(b)?; p.push(StringPart::Var(Var::new(v))); } else if x == b'{' { b.adv(); let v = VarName::parse(b)?; let mut default = None; if !b.has() { if !b.is_completion() { return Err(ParseError::Eof); } } else if b.peek() == b':' { b.adv(); if !b.has() { return Err(ParseError::Eof); } if b.peek() == b'-' { b.adv(); default = Some(ExpString::parse(b)?); } else { todo!(": in var expansion") } } if !b.has() { if !b.is_completion() { return Err(ParseError::Eof); } } else if b.peek() != b'}' { return Err(ParseError::Expected('}')); } let already_complete = b.has(); if already_complete { b.adv(); } p.push(StringPart::Var(Var { name: v, default, already_complete, })); } else if x == b'(' { b.adv(); let cmd = Ast::parse(b)?; b.spaces(); if b.is_empty() { return Err(ParseError::Eof); } else if b.peek() == b')' { b.adv(); p.push(StringPart::Cmd(cmd)); } else { return Err(ParseError::Expected(')')); } } else { // doesn't seem to be a variable or expansion, just add $ back into the string add_char(p, b'$'); continue; } if delim == b' ' { already_parsed = true; } continue; } if delim == b' ' && x == b'~' { p.push(StringPart::Var(Var { name: VarName { name: b"HOME".to_vec(), }, default: None, already_complete: true, })); } else { add_char(p, x); } if delim == b' ' { already_parsed = true; } } if b.has() && b"\"'".contains(&b.peek()) { continue; } break; } if b.is_completion() || already_parsed { Ok(Self { parts, delim: b' ' }) } else { Err(ParseError::Eof) } } } #[derive(Debug, Clone, PartialEq)] pub struct Command { pub cmd: T::Str, pub args: Vec, } impl Command { fn expand(self, e: &mut E) -> Res, E::Error> { let cmd = self.cmd.expand(e)?; let mut args = Vec::with_capacity(self.args.len()); for arg in self.args.into_iter() { args.push(arg.expand(e)?); } Ok(Command { cmd, args }) } } #[allow(unused)] #[derive(Debug)] pub enum ParseError { /// "clean" EOF, i.e. not in the middle of something Eof, /// "unclean" EOF, i.e. EOF after beginning a quoted string Incomplete, ExpectedAlphabetic, Unknown(u8), Expected(char), NotAString, NotAFunDecl, NotAVarAssign, NotHexDigit, } type Result = std::result::Result; pub fn do_parse(x: &[u8]) -> Res, (ParseError, &[u8])> { let mut c = Cursor::new(x, ParseMode::Command); match Ast::parse(&mut c) { Ok(ast) => Ok(ast), Err(e) => Err((e, c.buf)), } } pub enum CompletionKind { Command, Argument, Variable, None, } pub struct CompletionContext { pub kind: CompletionKind, pub partial: BString, } impl CompletionContext { pub fn none() -> Self { Self { kind: CompletionKind::None, partial: BString::new(), } } } impl Ast { fn completion(&self, e: &mut E) -> CompletionContext { match self { Ast::FunDecl(fd) => fd.body.body.completion(e), Ast::VarAssign(va) => va.val.completion(e, CompletionKind::Argument), Ast::Pipes(p) => p.completion(e), } } } impl ExpString { fn completion(&self, e: &mut E, kind: CompletionKind) -> CompletionContext { if let Some(StringPart::Var(var)) = self.parts.last() && !var.already_complete { CompletionContext { kind: CompletionKind::Variable, partial: var.name.name.clone(), } } else if let Ok(s) = self.clone().expand(e) { CompletionContext { kind, partial: s } } else { CompletionContext::none() } } } impl Pipes { fn completion(&self, e: &mut E) -> CompletionContext { let Some(cmd) = self.cmds.last() else { return CompletionContext::none(); }; if let Some(arg) = cmd.args.last() { arg.completion(e, CompletionKind::Argument) } else { cmd.cmd.completion(e, CompletionKind::Command) } } } pub fn completion_context<'a, E: Expander>(x: &'a [u8], e: &mut E) -> CompletionContext { let mut cursor = Cursor::new(x, ParseMode::Completion); let ast = Ast::parse(&mut cursor); let Ok(ast) = ast else { return CompletionContext::none(); }; if cursor.spaced { return CompletionContext::none(); } ast.completion(e) } trait Parse: Sized { fn parse(b: &mut Cursor<'_>) -> Result; } enum ParseMode { Command, Completion, } struct Cursor<'a> { buf: &'a [u8], mode: ParseMode, /// if the last byte that was consumed was whitespace or part of a word spaced: bool, backtrace: bool, } impl<'a> Cursor<'a> { fn new(buf: &'a [u8], mode: ParseMode) -> Self { Self { buf, mode, spaced: false, backtrace: false, } } // non empty fn has(&self) -> bool { !self.buf.is_empty() } fn is_empty(&self) -> bool { self.buf.is_empty() } fn bt(&self, word: &str) { if self.backtrace { let bt = std::backtrace::Backtrace::capture(); let bt = format!("{bt}"); println!("{word} {}\r", self.buf[0] as char); for l in bt.lines().skip(4).take(2) { println!("{l}\r"); } println!("\r"); } } fn peek(&self) -> u8 { self.bt("peek"); self.buf[0] } fn adv(&mut self) -> u8 { self.bt("adv"); let out = self.buf[0]; self.buf = &self.buf[1..]; self.spaced = false; out } fn advance(&mut self, amt: usize) -> &[u8] { self.bt(&format!("adv({amt})")); let out = &self.buf[..amt]; self.buf = &self.buf[amt..]; self.spaced = false; out } fn peek_space(&self) -> bool { if self.buf.is_empty() { return false; } matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r') } fn spaces(&mut self) { while self.peek_space() { self.adv(); self.spaced = true; } } fn is_completion(&self) -> bool { match self.mode { ParseMode::Completion => true, _ => false, } } fn parse(&mut self) -> Result { T::parse(self) } } impl Parse for Ast { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); let orig_len = b.buf.len(); let x = VarAssign::parse(b); if let Ok(va) = x { return Ok(Self::VarAssign(va)); } else if b.buf.len() != orig_len { x?; } let orig_len = b.buf.len(); let x = FunDecl::parse(b); if let Ok(fd) = x { return Ok(Self::FunDecl(fd)); } else if b.buf.len() != orig_len { x?; } Ok(Self::Pipes(b.parse()?)) } } impl Parse for Command { fn parse(b: &mut Cursor<'_>) -> Result { let path: ExpString = b.parse()?; let mut args = Vec::new(); loop { match ExpString::parse(b) { Ok(arg) => args.push(arg), Err(ParseError::NotAString) => break, Err(e) => Err(e)?, } } let x = Ok(Self { cmd: path, args }); x } } impl Parse for Pipes { fn parse(b: &mut Cursor<'_>) -> Result { let mut cmds: Vec> = vec![b.parse()?]; loop { b.spaces(); if b.is_empty() { return Ok(Pipes { cmds }); } let c = b.peek(); if c == b'|' { b.adv(); cmds.push(b.parse()?); } else if is_symbol(c) { return Ok(Pipes { cmds }); } else { Err(ParseError::Unknown(c))?; } } } }